diff options
author | Alon Zakai <azakai@google.com> | 2020-06-30 16:27:18 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-30 16:27:18 -0700 |
commit | ca89a9f2f59e991ea1a3341bd4a567b043699eb8 (patch) | |
tree | 5a326059e2374b5ebc881364e4a528a253bcd5b4 /src | |
parent | 0d5eca6cf4f61798d0854226d2c78d2655e07c30 (diff) | |
download | binaryen-ca89a9f2f59e991ea1a3341bd4a567b043699eb8.tar.gz binaryen-ca89a9f2f59e991ea1a3341bd4a567b043699eb8.tar.bz2 binaryen-ca89a9f2f59e991ea1a3341bd4a567b043699eb8.zip |
DWARF: Always update .debug_loc base offsets (#2936)
.debug_loc entries can have bases: a value that all values after
it in the list are relative to. Previously we used to keep the base
value as it was, to keep things as similar to the original DWARF as
possible. However, if optimizations move code around so that the
values after the base are before the base, then the values could
no longer be emitted, and we skipped them in effect.
This PR makes us always pick a new base for each list. This
allows the base to always work for the values after it, but does
mean we change the lists quite a lot more. If there is any extra
meaning to the original bases here we may lose that, but the
DWARF spec doesn't seem to indicate anything like that
(however, it isn't clear to me why LLVM then doesn't always
choose the maximal base as the code here does - LLVM's values
seem oddly arbitrary).
Also properly note the base of each compile unit, which
previously we just noted the old value, but didn't look at the
new one in the new binary being written.
Diffstat (limited to 'src')
-rw-r--r-- | src/wasm/wasm-debug.cpp | 135 |
1 files changed, 88 insertions, 47 deletions
diff --git a/src/wasm/wasm-debug.cpp b/src/wasm/wasm-debug.cpp index 4181b06f2..e90dc6d48 100644 --- a/src/wasm/wasm-debug.cpp +++ b/src/wasm/wasm-debug.cpp @@ -473,12 +473,18 @@ struct LocationUpdater { AddrExprMap oldExprAddrMap; FuncAddrMap oldFuncAddrMap; + // Map offsets of location list entries in the debug_loc section to the index + // of their compile unit. + std::unordered_map<BinaryLocation, size_t> locToUnitMap; + // Map start of line tables in the debug_line section to their new locations. - std::map<BinaryLocation, BinaryLocation> debugLineMap; + std::unordered_map<BinaryLocation, BinaryLocation> debugLineMap; + + typedef std::pair<BinaryLocation, BinaryLocation> OldToNew; - // Map offset of location list entries in the debug_loc section to the base - // address of the compilation units referencing them. - std::map<BinaryLocation, BinaryLocation> debugLocToUnitMap; + // Map of compile unit index => old and new base offsets (i.e., in the + // original binary and in the new one). + std::unordered_map<size_t, OldToNew> compileUnitBases; // TODO: for memory efficiency, we may want to do this in a streaming manner, // binary to binary, without YAML IR. @@ -618,8 +624,14 @@ struct LocationUpdater { return debugLineMap.at(old); } - BinaryLocation getLocationBaseAddress(BinaryLocation offset) const { - return debugLocToUnitMap.at(offset); + // Given an offset in .debug_loc, get the old and new compile unit bases. + OldToNew getCompileUnitBasesForLoc(size_t offset) const { + auto index = locToUnitMap.at(offset); + auto iter = compileUnitBases.find(index); + if (iter != compileUnitBases.end()) { + return iter->second; + } + return OldToNew{0, 0}; } }; @@ -756,7 +768,7 @@ static void updateDIE(const llvm::DWARFDebugInfoEntry& DIE, llvm::DWARFYAML::Entry& yamlEntry, const llvm::DWARFAbbreviationDeclaration* abbrevDecl, LocationUpdater& locationUpdater, - BinaryLocation compilationUnitBaseAddress) { + size_t compileUnitIndex) { auto tag = DIE.getTag(); // Pairs of low/high_pc require some special handling, as the high // may be an offset relative to the low. First, process everything but @@ -776,8 +788,13 @@ static void updateDIE(const llvm::DWARFDebugInfoEntry& DIE, tag == llvm::dwarf::DW_TAG_lexical_block || tag == llvm::dwarf::DW_TAG_label) { newValue = locationUpdater.getNewExprStart(oldValue); - } else if (tag == llvm::dwarf::DW_TAG_compile_unit || - tag == llvm::dwarf::DW_TAG_subprogram) { + } else if (tag == llvm::dwarf::DW_TAG_compile_unit) { + newValue = locationUpdater.getNewFuncStart(oldValue); + // Per the DWARF spec, "The base address of a compile unit is + // defined as the value of the DW_AT_low_pc attribute, if present." + locationUpdater.compileUnitBases[compileUnitIndex] = + LocationUpdater::OldToNew{oldValue, newValue}; + } else if (tag == llvm::dwarf::DW_TAG_subprogram) { newValue = locationUpdater.getNewFuncStart(oldValue); } else { Fatal() << "unknown tag with low_pc " @@ -793,8 +810,7 @@ static void updateDIE(const llvm::DWARFDebugInfoEntry& DIE, } else if (attr == llvm::dwarf::DW_AT_location && attrSpec.Form == llvm::dwarf::DW_FORM_sec_offset) { BinaryLocation locOffset = yamlValue.Value; - locationUpdater.debugLocToUnitMap[locOffset] = - compilationUnitBaseAddress; + locationUpdater.locToUnitMap[locOffset] = compileUnitIndex; } }); // Next, process the high_pcs. @@ -838,6 +854,7 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info, LocationUpdater& locationUpdater) { // The context has the high-level information we need, and the YAML is where // we write changes. First, iterate over the compile units. + size_t compileUnitIndex = 0; iterContextAndYAML( info.context->compile_units(), yaml.CompileUnits, @@ -854,15 +871,11 @@ static void updateCompileUnits(const BinaryenDWARFInfo& info, auto abbrevDecl = DIE.getAbbreviationDeclarationPtr(); if (abbrevDecl) { // This is relevant; look for things to update. - BinaryLocation compilationUnitBaseAddress = - CU->getBaseAddress() ? CU->getBaseAddress()->Address : 0; - updateDIE(DIE, - yamlEntry, - abbrevDecl, - locationUpdater, - compilationUnitBaseAddress); + updateDIE( + DIE, yamlEntry, abbrevDecl, locationUpdater, compileUnitIndex); } }); + compileUnitIndex++; }); } @@ -907,59 +920,87 @@ static void updateRanges(llvm::DWARFYAML::Data& yaml, // would indicate an end or a base in .debug_loc). static const BinaryLocation IGNOREABLE_LOCATION = 1; +static bool isNewBaseLoc(const llvm::DWARFYAML::Loc& loc) { + return loc.Start == BinaryLocation(-1); +} + +static bool isEndMarkerLoc(const llvm::DWARFYAML::Loc& loc) { + return loc.Start == 0 && loc.End == 0; +} + // Update the .debug_loc section. static void updateLoc(llvm::DWARFYAML::Data& yaml, const LocationUpdater& locationUpdater) { // Similar to ranges, try to update the start and end. Note that here we // can't skip since the location description is a variable number of bytes, // so we mark no longer valid addresses as empty. - // Locations have an optional base. bool atStart = true; - BinaryLocation base = 0; - for (size_t i = 0; i < yaml.Locs.size(); i++) { - auto& loc = yaml.Locs[i]; + // We need to keep positions in the .debug_loc section identical to before + // (or else we'd need to update their positions too) and so we need to keep + // base entries around (a base entry is added to every entry after it in the + // list). However, we may change the base's value as after moving instructions + // around the old base may not be smaller than all the values relative to it. + BinaryLocation oldBase, newBase; + auto& locs = yaml.Locs; + for (size_t i = 0; i < locs.size(); i++) { + auto& loc = locs[i]; if (atStart) { - base = locationUpdater.getLocationBaseAddress(loc.CompileUnitOffset); + std::tie(oldBase, newBase) = + locationUpdater.getCompileUnitBasesForLoc(loc.CompileUnitOffset); atStart = false; } + // By default we copy values over, unless we modify them below. BinaryLocation newStart = loc.Start, newEnd = loc.End; - if (newStart == BinaryLocation(-1)) { + if (isNewBaseLoc(loc)) { // This is a new base. // Note that the base is not the address of an instruction, necessarily - // it's just a number (seems like it could always be an instruction, but // that's not what LLVM emits). - base = newEnd; - } else if (newStart == 0 && newEnd == 0) { - // This is an end marker, this list is done. - base = 0; + // We must look forward at everything relative to this base, so that we + // can emit a new proper base (as mentioned earlier, the original base may + // not be valid if instructions moved to a position before it - they must + // be positive offsets from it). + oldBase = newBase = newEnd; + BinaryLocation smallest = -1; + for (size_t j = i + 1; j < locs.size(); j++) { + auto& futureLoc = locs[j]; + if (isNewBaseLoc(futureLoc) || isEndMarkerLoc(futureLoc)) { + break; + } + auto updatedStart = + locationUpdater.getNewStart(futureLoc.Start + oldBase); + // If we found a valid mapping, this is a relevant value for us. If the + // optimizer removed it, it's a 0, and we can ignore it here - we will + // emit IGNOREABLE_LOCATION for it later anyhow. + if (updatedStart != 0) { + smallest = std::min(smallest, updatedStart); + } + } + // If we found no valid values that will be relativized here, just use 0 + // as the new (never-to-be-used) base, which is less confusing (otherwise + // the value looks like it means something). + if (smallest == BinaryLocation(-1)) { + smallest = 0; + } + newBase = newEnd = smallest; + } else if (isEndMarkerLoc(loc)) { + // This is an end marker, this list is done; reset the base. atStart = true; } else { // This is a normal entry, try to find what it should be updated to. First // de-relativize it to the base to get the absolute address, then look for // a new address for it. - newStart = locationUpdater.getNewStart(loc.Start + base); - newEnd = locationUpdater.getNewEnd(loc.End + base); + newStart = locationUpdater.getNewStart(loc.Start + oldBase); + newEnd = locationUpdater.getNewEnd(loc.End + oldBase); if (newStart == 0 || newEnd == 0) { // This part of the loc no longer has a mapping, so we must ignore it. newStart = newEnd = IGNOREABLE_LOCATION; } else { - // See if we can relativize it against the base. If things moved around - // so that the base is no longer before the start or the end, then we - // must skip this. That is, if we had something like - // - // [base] [start] [end] - // - // (where the X axis is the offset) and transforms led to - // - // [start] [base] [end] - // - // or such, then give up TODO: perhaps remap with a new base? - if (newStart >= base && newEnd >= base) { - newStart -= base; - newEnd -= base; - } else { - newStart = newEnd = IGNOREABLE_LOCATION; - } + // We picked a new base that ensures it is smaller than the values we + // will relativize to it. + assert(newStart >= newBase && newEnd >= newBase); + newStart -= newBase; + newEnd -= newBase; } // The loc start and end markers have been preserved. However, TODO // instructions in the middle may have moved around, making the loc no |