Skip to content

Commit 22c3be4

Browse files
committed
report: strip deduplicated extabindex/extab entries from base object
When --deduplicate is active, weak/global functions already seen in earlier translation units are skipped for code accounting. However, their associated extabindex and extab section entries remained in the base object, causing section layout mismatches and incorrect data match percentages. This patch physically removes extabindex entries (and their corresponding extab entries) from the base object before diffing, when the entry's function relocation targets a deduplicated symbol. Section data, relocations, sizes, and symbol addresses are all adjusted to maintain consistency.
1 parent 3cebee6 commit 22c3be4

1 file changed

Lines changed: 112 additions & 1 deletion

File tree

objdiff-cli/src/cmd/report.rs

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,125 @@ fn report_object(
219219
.with_context(|| format!("Failed to open {p}"))
220220
})
221221
.transpose()?;
222-
let base = object
222+
let mut base = object
223223
.base_path
224224
.as_ref()
225225
.map(|p| {
226226
obj::read::read(p.as_ref(), diff_config, diff::DiffSide::Base)
227227
.with_context(|| format!("Failed to open {p}"))
228228
})
229229
.transpose()?;
230+
231+
// When deduplicating, remove extabindex/extab entries from the base object
232+
// that correspond to weak/global functions already seen in earlier units.
233+
// The linker would have stripped these, but our compiled objects still have them.
234+
// We must physically remove them so the diff engine sees matching section layouts.
235+
if let Some(existing) = &existing_functions
236+
&& let Some(base_obj) = &mut base
237+
{
238+
// Find weak/global code symbols that are duplicates
239+
let dedup_func_indices: HashSet<usize> = base_obj
240+
.symbols
241+
.iter()
242+
.enumerate()
243+
.filter(|(_, s)| {
244+
s.size > 0
245+
&& (s.flags.contains(SymbolFlag::Global)
246+
|| s.flags.contains(SymbolFlag::Weak))
247+
&& existing.contains(&s.name)
248+
})
249+
.map(|(i, _)| i)
250+
.collect();
251+
252+
if !dedup_func_indices.is_empty() {
253+
// (extabindex_section_idx, entry_addr, entry_size, extab_info)
254+
type Removal = (usize, u64, u64, Option<(usize, usize)>);
255+
let mut removals: Vec<Removal> = vec![];
256+
257+
for (section_idx, section) in base_obj.sections.iter().enumerate() {
258+
if section.name != "extabindex" {
259+
continue;
260+
}
261+
for sym in base_obj.symbols.iter() {
262+
if sym.section != Some(section_idx) || sym.size == 0 {
263+
continue;
264+
}
265+
// Check function relocation at sym.address
266+
let is_dedup = section.relocations.iter().any(|r| {
267+
r.address == sym.address
268+
&& dedup_func_indices.contains(&r.target_symbol)
269+
});
270+
if !is_dedup {
271+
continue;
272+
}
273+
// Find corresponding extab entry via relocation at sym.address + 8
274+
let extab_info = section
275+
.relocations
276+
.iter()
277+
.find(|r| r.address == sym.address + 8)
278+
.and_then(|r| {
279+
let extab_sym = &base_obj.symbols[r.target_symbol];
280+
extab_sym.section.map(|si| (si, r.target_symbol))
281+
});
282+
removals.push((section_idx, sym.address, sym.size, extab_info));
283+
}
284+
}
285+
286+
// Process removals for each affected section
287+
// Collect all (section_idx, address, size) pairs to remove
288+
let mut section_removals: Vec<(usize, u64, u64)> = vec![];
289+
for (ei_sec, ei_addr, ei_size, extab_info) in &removals {
290+
section_removals.push((*ei_sec, *ei_addr, *ei_size));
291+
if let Some((extab_sec, extab_sym_idx)) = extab_info {
292+
let extab_sym = &base_obj.symbols[*extab_sym_idx];
293+
section_removals.push((*extab_sec, extab_sym.address, extab_sym.size));
294+
}
295+
}
296+
297+
// Group by section and sort by address descending (remove from end first)
298+
section_removals.sort_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1)));
299+
300+
for (section_idx, addr, size) in &section_removals {
301+
let section = &mut base_obj.sections[*section_idx];
302+
let base_addr = section.address;
303+
let offset = (*addr - base_addr) as usize;
304+
let sz = *size as usize;
305+
306+
// Remove bytes from section data
307+
if offset + sz <= section.data.0.len() {
308+
section.data.0.drain(offset..offset + sz);
309+
}
310+
311+
// Remove relocations in this range, adjust those after
312+
section.relocations.retain(|r| r.address < *addr || r.address >= addr + size);
313+
for r in &mut section.relocations {
314+
if r.address >= addr + size {
315+
r.address -= size;
316+
}
317+
}
318+
319+
// Adjust section size
320+
section.size -= size;
321+
322+
// Adjust symbol addresses in this section
323+
for s in &mut base_obj.symbols {
324+
if s.section == Some(*section_idx) {
325+
if s.address >= *addr && s.address < addr + size {
326+
s.size = 0;
327+
} else if s.address >= addr + size {
328+
s.address -= size;
329+
} else if s.address < *addr
330+
&& s.address + s.size > *addr
331+
{
332+
// Symbol spans the removed region — shrink it
333+
s.size -= size;
334+
}
335+
}
336+
}
337+
}
338+
}
339+
}
340+
230341
let result =
231342
diff::diff_objs(target.as_ref(), base.as_ref(), None, diff_config, &mapping_config)?;
232343

0 commit comments

Comments
 (0)