diff options
author | Thomas Lively <tlively@google.com> | 2024-12-03 11:20:36 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-03 11:20:36 -0800 |
commit | 87f9dac127b387715d8d96ac7ec8fd469d8c2dab (patch) | |
tree | aa1aec906bf6adc3ea3d93c56616d393850e2249 | |
parent | f331120e4b942a795d4a6b6d0d5a3d781c1e6a4c (diff) | |
download | binaryen-87f9dac127b387715d8d96ac7ec8fd469d8c2dab.tar.gz binaryen-87f9dac127b387715d8d96ac7ec8fd469d8c2dab.tar.bz2 binaryen-87f9dac127b387715d8d96ac7ec8fd469d8c2dab.zip |
[NFC] Encapsulate source map reader state (#7132)
Move all state relevant to reading source maps out of WasmBinaryReader
and into a new utility, SourceMapReader. This is a prerequisite for
parallelizing the parsing of function bodies, since the source map
reader state is different at the beginning of each function.
Also take the opportunity to simplify the way we read source maps, for
example by deferring the reading of anything but the position of a debug
location until it will be used and by using `std::optional` instead of
singleton `std::set`s to store function prologue and epilogue debug
locations.
-rw-r--r-- | src/ir/module-utils.cpp | 36 | ||||
-rw-r--r-- | src/parsing.h | 9 | ||||
-rw-r--r-- | src/passes/DebugLocationPropagation.cpp | 4 | ||||
-rw-r--r-- | src/passes/Print.cpp | 8 | ||||
-rw-r--r-- | src/source-map.h | 101 | ||||
-rw-r--r-- | src/tools/wasm-dis.cpp | 1 | ||||
-rw-r--r-- | src/wasm-binary.h | 49 | ||||
-rw-r--r-- | src/wasm-stack.h | 8 | ||||
-rw-r--r-- | src/wasm.h | 4 | ||||
-rw-r--r-- | src/wasm/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/wasm/parsing.cpp | 12 | ||||
-rw-r--r-- | src/wasm/source-map.cpp | 212 | ||||
-rw-r--r-- | src/wasm/wasm-binary.cpp | 259 | ||||
-rw-r--r-- | src/wasm/wasm-io.cpp | 19 | ||||
-rw-r--r-- | src/wasm/wasm-ir-builder.cpp | 4 | ||||
-rw-r--r-- | src/wasm/wasm-stack.cpp | 8 | ||||
-rw-r--r-- | src/wasm/wasm.cpp | 4 | ||||
-rw-r--r-- | test/gtest/CMakeLists.txt | 2 | ||||
-rw-r--r-- | test/gtest/source-map.cpp (renamed from test/gtest/binary-reader.cpp) | 36 |
19 files changed, 383 insertions, 394 deletions
diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp index 5ebd6edef..431e33cc6 100644 --- a/src/ir/module-utils.cpp +++ b/src/ir/module-utils.cpp @@ -26,32 +26,20 @@ namespace wasm::ModuleUtils { // Update the file name indices when moving a set of debug locations from one // module to another. -static void updateLocationSet(std::set<Function::DebugLocation>& locations, - std::vector<Index>& fileIndexMap) { - std::set<Function::DebugLocation> updatedLocations; - - for (auto iter : locations) { - iter.fileIndex = fileIndexMap[iter.fileIndex]; - updatedLocations.insert(iter); +static void updateLocation(std::optional<Function::DebugLocation>& location, + std::vector<Index>& fileIndexMap) { + if (location) { + location->fileIndex = fileIndexMap[location->fileIndex]; } - locations.clear(); - std::swap(locations, updatedLocations); } // Update the symbol name indices when moving a set of debug locations from one // module to another. -static void updateSymbolSet(std::set<Function::DebugLocation>& locations, - std::vector<Index>& symbolIndexMap) { - std::set<Function::DebugLocation> updatedLocations; - - for (auto iter : locations) { - if (iter.symbolNameIndex) { - iter.symbolNameIndex = symbolIndexMap[*iter.symbolNameIndex]; - } - updatedLocations.insert(iter); +static void updateSymbol(std::optional<Function::DebugLocation>& location, + std::vector<Index>& symbolIndexMap) { + if (location && location->symbolNameIndex) { + location->symbolNameIndex = symbolIndexMap[*location->symbolNameIndex]; } - locations.clear(); - std::swap(locations, updatedLocations); } // Copies a function into a module. If newName is provided it is used as the @@ -94,8 +82,8 @@ copyFunctionWithoutAdd(Function* func, iter.second->fileIndex = (*fileIndexMap)[iter.second->fileIndex]; } } - updateLocationSet(ret->prologLocation, *fileIndexMap); - updateLocationSet(ret->epilogLocation, *fileIndexMap); + updateLocation(ret->prologLocation, *fileIndexMap); + updateLocation(ret->epilogLocation, *fileIndexMap); } if (symbolNameIndexMap) { for (auto& iter : ret->debugLocations) { @@ -105,8 +93,8 @@ copyFunctionWithoutAdd(Function* func, (*symbolNameIndexMap)[*(iter.second->symbolNameIndex)]; } } - updateSymbolSet(ret->prologLocation, *symbolNameIndexMap); - updateSymbolSet(ret->epilogLocation, *symbolNameIndexMap); + updateSymbol(ret->prologLocation, *symbolNameIndexMap); + updateSymbol(ret->epilogLocation, *symbolNameIndexMap); } } ret->module = func->module; diff --git a/src/parsing.h b/src/parsing.h index d59b3bd7c..80d4db9fb 100644 --- a/src/parsing.h +++ b/src/parsing.h @@ -43,15 +43,6 @@ struct ParseException { void dump(std::ostream& o) const; }; -struct MapParseException { - std::string text; - - MapParseException() : text("unknown parse error") {} - MapParseException(std::string text) : text(text) {} - - void dump(std::ostream& o) const; -}; - // Helper for parsers that may not have unique label names. This transforms // the names into unique ones, as required by Binaryen IR. struct UniqueNameMapper { diff --git a/src/passes/DebugLocationPropagation.cpp b/src/passes/DebugLocationPropagation.cpp index e2d1ac50f..b2eb8fa83 100644 --- a/src/passes/DebugLocationPropagation.cpp +++ b/src/passes/DebugLocationPropagation.cpp @@ -64,10 +64,10 @@ struct DebugLocationPropagation if (auto it = locs.find(previous); it != locs.end()) { locs[curr] = it->second; } - } else if (self->getFunction()->prologLocation.size()) { + } else if (self->getFunction()->prologLocation) { // Instructions may inherit their locations from the function // prolog. - locs[curr] = *self->getFunction()->prologLocation.begin(); + locs[curr] = *self->getFunction()->prologLocation; } } expressionStack.push_back(curr); diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 4ca40f35a..5f2d1cc3d 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -3060,8 +3060,8 @@ void PrintSExpression::visitDefinedFunction(Function* curr) { currFunction = curr; lastPrintedLocation = std::nullopt; lastPrintIndent = 0; - if (currFunction->prologLocation.size()) { - printDebugLocation(*currFunction->prologLocation.begin()); + if (currFunction->prologLocation) { + printDebugLocation(*currFunction->prologLocation); } handleSignature(curr, true); incIndent(); @@ -3095,14 +3095,14 @@ void PrintSExpression::visitDefinedFunction(Function* curr) { } assert(controlFlowDepth == 0); } - if (currFunction->epilogLocation.size()) { + if (currFunction->epilogLocation) { // Print last debug location: mix of decIndent and printDebugLocation // logic. doIndent(o, indent); if (!minify) { indent--; } - printDebugLocation(*currFunction->epilogLocation.begin()); + printDebugLocation(*currFunction->epilogLocation); o << ')'; } else { decIndent(); diff --git a/src/source-map.h b/src/source-map.h new file mode 100644 index 000000000..d8c50b5e1 --- /dev/null +++ b/src/source-map.h @@ -0,0 +1,101 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_source_map_h +#define wasm_source_map_h + +#include <optional> +#include <unordered_map> + +#include "wasm.h" + +namespace wasm { + +struct MapParseException { + std::string text; + + MapParseException(std::string text) : text(text) {} + void dump(std::ostream& o) const; +}; + +class SourceMapReader { + const std::vector<char>& buffer; + + // Current position in the source map buffer. + size_t pos = 0; + + // The location in the binary the next debug location will correspond to. 0 + // iff there are no more debug locations. + size_t location = 0; + + // The file index, line, column, and symbol index the next debug location will + // be offset from. + uint32_t file = 0; + uint32_t line = 1; + uint32_t col = 0; + uint32_t symbol = 0; + + // Whether the last read record had position and symbol information. + bool hasInfo = false; + bool hasSymbol = false; + +public: + SourceMapReader(const std::vector<char>& buffer) : buffer(buffer) {} + + void readHeader(Module& wasm); + + std::optional<Function::DebugLocation> + readDebugLocationAt(size_t currLocation); + + // Do not reuse debug info across function boundaries. + void finishFunction() { hasInfo = false; } + +private: + char peek() { + if (pos >= buffer.size()) { + throw MapParseException("unexpected end of source map"); + } + return buffer[pos]; + } + + char get() { + char c = peek(); + ++pos; + return c; + } + + bool maybeGet(char c) { + if (pos < buffer.size() && peek() == c) { + ++pos; + return true; + } + return false; + } + + void expect(char c) { + using namespace std::string_literals; + char got = get(); + if (got != c) { + throw MapParseException("expected '"s + c + "', got '" + got + "'"); + } + } + + int32_t readBase64VLQ(); +}; + +} // namespace wasm + +#endif // wasm_source_map_h diff --git a/src/tools/wasm-dis.cpp b/src/tools/wasm-dis.cpp index 1603736ce..cc377e4e2 100644 --- a/src/tools/wasm-dis.cpp +++ b/src/tools/wasm-dis.cpp @@ -18,6 +18,7 @@ // wasm2asm console tool // +#include "source-map.h" #include "support/colors.h" #include "support/file.h" #include "wasm-io.h" diff --git a/src/wasm-binary.h b/src/wasm-binary.h index b9cbba136..7c32e2169 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -28,6 +28,7 @@ #include "ir/import-utils.h" #include "ir/module-utils.h" #include "parsing.h" +#include "source-map.h" #include "wasm-builder.h" #include "wasm-ir-builder.h" #include "wasm-traversal.h" @@ -1403,41 +1404,13 @@ private: void prepare(); }; +extern std::vector<char> defaultEmptySourceMap; + class WasmBinaryReader { Module& wasm; MixedArena& allocator; const std::vector<char>& input; - // Source map debugging support. - - std::istream* sourceMap; - - // The binary position that the next debug location refers to. That is, this - // is the first item in a source map entry that we have read (the "column", in - // source map terms, which for wasm means the offset in the binary). We have - // read this entry, but have not used it yet (we use it when we read the - // expression at this binary offset). - // - // This is set to 0 as an invalid value if we reach the end of the source map - // and there is nothing left to read. - size_t nextDebugPos; - - // The debug location (file:line:col) corresponding to |nextDebugPos|. That - // is, this is the next 3 fields in a source map entry that we have read, but - // not used yet. - // - // If that location has no debug info (it lacks those 3 fields), then this - // contains the info from the previous one, because in a source map, these - // fields are relative to their last appearance, so we cannot forget them (we - // can't just do something like std::optional<DebugLocation> or such); for - // example, if we have line number 100, then no debug info, and then line - // number 500, then when we get to 500 we will see "+400" which is relative to - // the last existing line number (we "skip" over a place without debug info). - Function::DebugLocation nextDebugLocation; - - // Whether debug info is present on |nextDebugPos| (see comment there). - bool nextDebugLocationHasDebugInfo; - // Settings. bool debugInfo = true; @@ -1448,17 +1421,20 @@ class WasmBinaryReader { size_t pos = 0; Index startIndex = -1; - std::set<Function::DebugLocation> debugLocation; size_t codeSectionLocation; std::unordered_set<uint8_t> seenSections; + IRBuilder builder; + SourceMapReader sourceMapReader; + // All types defined in the type section std::vector<HeapType> types; public: WasmBinaryReader(Module& wasm, FeatureSet features, - const std::vector<char>& input); + const std::vector<char>& input, + const std::vector<char>& sourceMap = defaultEmptySourceMap); void setDebugInfo(bool value) { debugInfo = value; } void setDWARF(bool value) { DWARF = value; } @@ -1584,8 +1560,6 @@ public: Expression* readExpression(); void readGlobals(); - IRBuilder builder; - // validations that cannot be performed on the Module void validateBinary(); @@ -1607,13 +1581,6 @@ public: void readDylink(size_t); void readDylink0(size_t); - // Debug information reading helpers - void setDebugLocations(std::istream* sourceMap_) { sourceMap = sourceMap_; } - std::unordered_map<std::string, Index> debugInfoFileIndices; - std::unordered_map<std::string, Index> debugInfoSymbolNameIndices; - void readNextDebugLocation(); - void readSourceMapHeader(); - Index readMemoryAccess(Address& alignment, Address& offset); std::tuple<Name, Address, Address> getMemarg(); diff --git a/src/wasm-stack.h b/src/wasm-stack.h index 85003f9ea..f48233333 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -456,8 +456,8 @@ public: void emit(Expression* curr) { writer.visit(curr); } void emitHeader() { - if (func->prologLocation.size()) { - parent.writeDebugLocation(*func->prologLocation.begin()); + if (func->prologLocation) { + parent.writeDebugLocation(*func->prologLocation); } writer.mapLocalsAndEmitHeader(); } @@ -469,8 +469,8 @@ public: void emitFunctionEnd() { // Indicate the debug location corresponding to the end opcode // that terminates the function code. - if (func->epilogLocation.size()) { - parent.writeDebugLocation(*func->epilogLocation.begin()); + if (func->epilogLocation) { + parent.writeDebugLocation(*func->epilogLocation); } else { // The end opcode has no debug location. parent.writeNoDebugLocation(); diff --git a/src/wasm.h b/src/wasm.h index a5fc070e6..b3ae82bcf 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2095,8 +2095,8 @@ public: // One can explicitly set the debug location of an expression to // nullopt to stop the propagation of debug locations. std::unordered_map<Expression*, std::optional<DebugLocation>> debugLocations; - std::set<DebugLocation> prologLocation; - std::set<DebugLocation> epilogLocation; + std::optional<DebugLocation> prologLocation; + std::optional<DebugLocation> epilogLocation; // General debugging info support: track instructions and the function itself. std::unordered_map<Expression*, BinaryLocations::Span> expressionLocations; diff --git a/src/wasm/CMakeLists.txt b/src/wasm/CMakeLists.txt index 7a7b26ead..64c88c997 100644 --- a/src/wasm/CMakeLists.txt +++ b/src/wasm/CMakeLists.txt @@ -2,6 +2,7 @@ file(GLOB wasm_HEADERS ../*.h) set(wasm_SOURCES literal.cpp parsing.cpp + source-map.cpp wasm.cpp wasm-binary.cpp wasm-debug.cpp diff --git a/src/wasm/parsing.cpp b/src/wasm/parsing.cpp index 1606a2dd1..5d34da78e 100644 --- a/src/wasm/parsing.cpp +++ b/src/wasm/parsing.cpp @@ -36,18 +36,6 @@ void ParseException::dump(std::ostream& o) const { Colors::normal(o); } -void MapParseException::dump(std::ostream& o) const { - Colors::magenta(o); - o << "["; - Colors::red(o); - o << "map parse exception: "; - Colors::green(o); - o << text; - Colors::magenta(o); - o << "]"; - Colors::normal(o); -} - // UniqueNameMapper Name UniqueNameMapper::getPrefixedName(Name prefix) { diff --git a/src/wasm/source-map.cpp b/src/wasm/source-map.cpp new file mode 100644 index 000000000..7ad26e898 --- /dev/null +++ b/src/wasm/source-map.cpp @@ -0,0 +1,212 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "source-map.h" +#include "support/colors.h" + +namespace wasm { + +std::vector<char> defaultEmptySourceMap; + +void MapParseException::dump(std::ostream& o) const { + Colors::magenta(o); + o << "["; + Colors::red(o); + o << "map parse exception: "; + Colors::green(o); + o << text; + Colors::magenta(o); + o << "]"; + Colors::normal(o); +} + +void SourceMapReader::readHeader(Module& wasm) { + assert(pos == 0); + if (buffer.empty()) { + return; + } + + auto skipWhitespace = [&]() { + while (pos < buffer.size() && (buffer[pos] == ' ' || buffer[pos] == '\n')) { + ++pos; + } + }; + + auto findField = [&](const char* name) { + bool matching = false; + size_t len = strlen(name); + size_t index = 0; + while (1) { + char ch = get(); + if (ch == '\"') { + if (matching) { + if (index == len) { + // We matched a terminating quote. + break; + } + matching = false; + } else { + // Beginning of a new potential match. + matching = true; + index = 0; + } + } else if (matching && name[index] == ch) { + ++index; + } else if (matching) { + matching = false; + } + } + skipWhitespace(); + expect(':'); + skipWhitespace(); + return true; + }; + + auto readString = [&](std::string& str) { + std::vector<char> vec; + skipWhitespace(); + expect('\"'); + while (1) { + if (maybeGet('\"')) { + break; + } + vec.push_back(get()); + } + skipWhitespace(); + str = std::string(vec.begin(), vec.end()); + }; + + if (!findField("sources")) { + throw MapParseException("cannot find the 'sources' field in map"); + } + + skipWhitespace(); + expect('['); + if (!maybeGet(']')) { + do { + std::string file; + readString(file); + wasm.debugInfoFileNames.push_back(file); + } while (maybeGet(',')); + expect(']'); + } + + if (findField("names")) { + skipWhitespace(); + expect('['); + if (!maybeGet(']')) { + do { + std::string symbol; + readString(symbol); + wasm.debugInfoSymbolNames.push_back(symbol); + } while (maybeGet(',')); + expect(']'); + } + } + + if (!findField("mappings")) { + throw MapParseException("cannot find the 'mappings' field in map"); + } + + expect('\"'); + if (maybeGet('\"')) { + // There are no mappings. + location = 0; + return; + } + + // Read the location of the first debug location. + location = readBase64VLQ(); +} + +std::optional<Function::DebugLocation> +SourceMapReader::readDebugLocationAt(size_t currLocation) { + if (pos >= buffer.size()) { + return std::nullopt; + } + + while (location && location <= currLocation) { + do { + char next = peek(); + if (next == ',' || next == '\"') { + // This is a 1-length entry, so the next location has no debug info. + hasInfo = false; + break; + } + + hasInfo = true; + file += readBase64VLQ(); + line += readBase64VLQ(); + col += readBase64VLQ(); + + next = peek(); + if (next == ',' || next == '\"') { + hasSymbol = false; + break; + } + + hasSymbol = true; + symbol += readBase64VLQ(); + + } while (false); + + // Check whether there is another record to read the position for. + char next = get(); + if (next == '\"') { + // End of records. + location = 0; + break; + } + if (next != ',') { + throw MapParseException("Expected delimiter"); + } + + // Set up for the next record. + location += readBase64VLQ(); + } + + if (!hasInfo) { + return std::nullopt; + } + + auto sym = hasSymbol ? symbol : std::optional<uint32_t>{}; + return Function::DebugLocation{file, line, col, sym}; +} + +int32_t SourceMapReader::readBase64VLQ() { + uint32_t value = 0; + uint32_t shift = 0; + while (1) { + auto ch = get(); + if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch < 'g')) { + // last number digit + uint32_t digit = ch < 'a' ? ch - 'A' : ch - 'a' + 26; + value |= digit << shift; + break; + } + if (!(ch >= 'g' && ch <= 'z') && !(ch >= '0' && ch <= '9') && ch != '+' && + ch != '/') { + throw MapParseException("invalid VLQ digit"); + } + uint32_t digit = + ch > '9' ? ch - 'g' : (ch >= '0' ? ch - '0' + 20 : (ch == '+' ? 30 : 31)); + value |= digit << shift; + shift += 5; + } + return value & 1 ? -int32_t(value >> 1) : int32_t(value >> 1); +} + +} // namespace wasm diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 82ac422ea..86b3ea899 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1737,10 +1737,10 @@ void WasmBinaryWriter::writeField(const Field& field) { WasmBinaryReader::WasmBinaryReader(Module& wasm, FeatureSet features, - const std::vector<char>& input) - : wasm(wasm), allocator(wasm.allocator), input(input), sourceMap(nullptr), - nextDebugPos(0), nextDebugLocation{0, 0, 0, std::nullopt}, - nextDebugLocationHasDebugInfo(false), debugLocation(), builder(wasm) { + const std::vector<char>& input, + const std::vector<char>& sourceMap) + : wasm(wasm), allocator(wasm.allocator), input(input), builder(wasm), + sourceMapReader(sourceMap) { wasm.features = features; } @@ -1788,7 +1788,7 @@ void WasmBinaryReader::read() { } readHeader(); - readSourceMapHeader(); + sourceMapReader.readHeader(wasm); // Read sections until the end while (more()) { @@ -2804,12 +2804,10 @@ void WasmBinaryReader::readFunctions() { BinaryLocation(pos - codeSectionLocation + size)}; } - readNextDebugLocation(); + func->prologLocation = sourceMapReader.readDebugLocationAt(pos); readVars(); setLocalNames(*func, numFuncImports + i); - - func->prologLocation = debugLocation; { // Process the function body. Even if we are skipping function bodies we // need to not skip the start function. That contains important code for @@ -2846,11 +2844,9 @@ void WasmBinaryReader::readFunctions() { } } + sourceMapReader.finishFunction(); TypeUpdating::handleNonDefaultableLocals(func.get(), wasm); - - std::swap(func->epilogLocation, debugLocation); currFunction = nullptr; - debugLocation.clear(); } } @@ -2879,9 +2875,8 @@ void WasmBinaryReader::readVars() { } Result<> WasmBinaryReader::readInst() { - readNextDebugLocation(); - if (debugLocation.size()) { - builder.setDebugLocation(*debugLocation.begin()); + if (auto loc = sourceMapReader.readDebugLocationAt(pos)) { + builder.setDebugLocation(loc); } uint8_t code = getInt8(); switch (code) { @@ -4273,242 +4268,6 @@ void WasmBinaryReader::readExports() { } } -static int32_t readBase64VLQ(std::istream& in) { - uint32_t value = 0; - uint32_t shift = 0; - while (1) { - auto ch = in.get(); - if (ch == EOF) { - throw MapParseException("unexpected EOF in the middle of VLQ"); - } - if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch < 'g')) { - // last number digit - uint32_t digit = ch < 'a' ? ch - 'A' : ch - 'a' + 26; - value |= digit << shift; - break; - } - if (!(ch >= 'g' && ch <= 'z') && !(ch >= '0' && ch <= '9') && ch != '+' && - ch != '/') { - throw MapParseException("invalid VLQ digit"); - } - uint32_t digit = - ch > '9' ? ch - 'g' : (ch >= '0' ? ch - '0' + 20 : (ch == '+' ? 30 : 31)); - value |= digit << shift; - shift += 5; - } - return value & 1 ? -int32_t(value >> 1) : int32_t(value >> 1); -} - -void WasmBinaryReader::readSourceMapHeader() { - if (!sourceMap) { - return; - } - - auto skipWhitespace = [&]() { - while (sourceMap->peek() == ' ' || sourceMap->peek() == '\n') { - sourceMap->get(); - } - }; - - auto maybeReadChar = [&](char expected) { - if (sourceMap->peek() != expected) { - return false; - } - sourceMap->get(); - return true; - }; - - auto mustReadChar = [&](char expected) { - char c = sourceMap->get(); - if (c != expected) { - throw MapParseException(std::string("Unexpected char: expected '") + - expected + "' got '" + c + "'"); - } - }; - - auto findField = [&](const char* name) { - bool matching = false; - size_t len = strlen(name); - size_t pos; - while (1) { - int ch = sourceMap->get(); - if (ch == EOF) { - return false; - } - if (ch == '\"') { - if (matching) { - // we matched a terminating quote. - if (pos == len) { - break; - } - matching = false; - } else { - matching = true; - pos = 0; - } - } else if (matching && name[pos] == ch) { - ++pos; - } else if (matching) { - matching = false; - } - } - skipWhitespace(); - mustReadChar(':'); - skipWhitespace(); - return true; - }; - - auto readString = [&](std::string& str) { - std::vector<char> vec; - skipWhitespace(); - mustReadChar('\"'); - if (!maybeReadChar('\"')) { - while (1) { - int ch = sourceMap->get(); - if (ch == EOF) { - throw MapParseException("unexpected EOF in the middle of string"); - } - if (ch == '\"') { - break; - } - vec.push_back(ch); - } - } - skipWhitespace(); - str = std::string(vec.begin(), vec.end()); - }; - - if (!findField("sources")) { - throw MapParseException("cannot find the 'sources' field in map"); - } - - skipWhitespace(); - mustReadChar('['); - if (!maybeReadChar(']')) { - do { - std::string file; - readString(file); - Index index = wasm.debugInfoFileNames.size(); - wasm.debugInfoFileNames.push_back(file); - debugInfoFileIndices[file] = index; - } while (maybeReadChar(',')); - mustReadChar(']'); - } - - if (findField("names")) { - skipWhitespace(); - mustReadChar('['); - if (!maybeReadChar(']')) { - do { - std::string symbol; - readString(symbol); - Index index = wasm.debugInfoSymbolNames.size(); - wasm.debugInfoSymbolNames.push_back(symbol); - debugInfoSymbolNameIndices[symbol] = index; - } while (maybeReadChar(',')); - mustReadChar(']'); - } - } - - if (!findField("mappings")) { - throw MapParseException("cannot find the 'mappings' field in map"); - } - - mustReadChar('\"'); - if (maybeReadChar('\"')) { // empty mappings - nextDebugPos = 0; - return; - } - // read first debug location - // TODO: Handle the case where the very first one has only a position but not - // debug info. In practice that does not happen, which needs - // investigation (if it does, it will assert in readBase64VLQ, so it - // would not be a silent error at least). - uint32_t position = readBase64VLQ(*sourceMap); - nextDebugPos = position; - - auto peek = sourceMap->peek(); - if (peek == ',' || peek == '\"') { - // This is a 1-length entry, so the next location has no debug info. - nextDebugLocationHasDebugInfo = false; - } else { - uint32_t fileIndex = readBase64VLQ(*sourceMap); - uint32_t lineNumber = - readBase64VLQ(*sourceMap) + 1; // adjust zero-based line number - uint32_t columnNumber = readBase64VLQ(*sourceMap); - std::optional<BinaryLocation> symbolNameIndex; - peek = sourceMap->peek(); - if (!(peek == ',' || peek == '\"')) { - symbolNameIndex = readBase64VLQ(*sourceMap); - } - nextDebugLocation = {fileIndex, lineNumber, columnNumber, symbolNameIndex}; - nextDebugLocationHasDebugInfo = true; - } -} - -void WasmBinaryReader::readNextDebugLocation() { - if (!sourceMap) { - return; - } - - if (nextDebugPos == 0) { - // We reached the end of the source map; nothing left to read. - return; - } - - while (nextDebugPos && nextDebugPos <= pos) { - debugLocation.clear(); - // use debugLocation only for function expressions - if (currFunction) { - if (nextDebugLocationHasDebugInfo) { - debugLocation.insert(nextDebugLocation); - } else { - debugLocation.clear(); - } - } - - char ch; - *sourceMap >> ch; - if (ch == '\"') { // end of records - nextDebugPos = 0; - break; - } - if (ch != ',') { - throw MapParseException("Unexpected delimiter"); - } - - int32_t positionDelta = readBase64VLQ(*sourceMap); - uint32_t position = nextDebugPos + positionDelta; - - nextDebugPos = position; - - auto peek = sourceMap->peek(); - if (peek == ',' || peek == '\"') { - // This is a 1-length entry, so the next location has no debug info. - nextDebugLocationHasDebugInfo = false; - break; - } - - int32_t fileIndexDelta = readBase64VLQ(*sourceMap); - uint32_t fileIndex = nextDebugLocation.fileIndex + fileIndexDelta; - int32_t lineNumberDelta = readBase64VLQ(*sourceMap); - uint32_t lineNumber = nextDebugLocation.lineNumber + lineNumberDelta; - int32_t columnNumberDelta = readBase64VLQ(*sourceMap); - uint32_t columnNumber = nextDebugLocation.columnNumber + columnNumberDelta; - - std::optional<BinaryLocation> symbolNameIndex; - peek = sourceMap->peek(); - if (!(peek == ',' || peek == '\"')) { - int32_t symbolNameIndexDelta = readBase64VLQ(*sourceMap); - symbolNameIndex = - nextDebugLocation.symbolNameIndex.value_or(0) + symbolNameIndexDelta; - } - - nextDebugLocation = {fileIndex, lineNumber, columnNumber, symbolNameIndex}; - nextDebugLocationHasDebugInfo = true; - } -} - Expression* WasmBinaryReader::readExpression() { assert(builder.empty()); while (input[pos] != BinaryConsts::End) { diff --git a/src/wasm/wasm-io.cpp b/src/wasm/wasm-io.cpp index 149216e1a..e1d036cec 100644 --- a/src/wasm/wasm-io.cpp +++ b/src/wasm/wasm-io.cpp @@ -50,25 +50,18 @@ void ModuleReader::readText(std::string filename, Module& wasm) { void ModuleReader::readBinaryData(std::vector<char>& input, Module& wasm, std::string sourceMapFilename) { - std::unique_ptr<std::ifstream> sourceMapStream; + std::vector<char> sourceMapBuffer; + if (sourceMapFilename.size()) { + sourceMapBuffer = + read_file<std::vector<char>>(sourceMapFilename, Flags::Text); + } // Assume that the wasm has had its initial features applied, and use those // while parsing. - WasmBinaryReader parser(wasm, wasm.features, input); + WasmBinaryReader parser(wasm, wasm.features, input, sourceMapBuffer); parser.setDebugInfo(debugInfo); parser.setDWARF(DWARF); parser.setSkipFunctionBodies(skipFunctionBodies); - if (sourceMapFilename.size()) { - sourceMapStream = std::make_unique<std::ifstream>(); - sourceMapStream->open(wasm::Path::to_path(sourceMapFilename)); - if (!sourceMapStream->is_open()) { - Fatal() << "Failed opening '" << sourceMapFilename << "'"; - } - parser.setDebugLocations(sourceMapStream.get()); - } parser.read(); - if (sourceMapStream) { - sourceMapStream->close(); - } } void ModuleReader::readBinary(std::string filename, diff --git a/src/wasm/wasm-ir-builder.cpp b/src/wasm/wasm-ir-builder.cpp index a51337cda..96212ccd7 100644 --- a/src/wasm/wasm-ir-builder.cpp +++ b/src/wasm/wasm-ir-builder.cpp @@ -709,7 +709,7 @@ Result<> IRBuilder::visitFunctionStart(Function* func) { return Err{"unexpected start of function"}; } if (auto* loc = std::get_if<Function::DebugLocation>(&debugLoc)) { - func->prologLocation.insert(*loc); + func->prologLocation = *loc; } debugLoc = CanReceiveDebug(); scopeStack.push_back(ScopeCtx::makeFunc(func)); @@ -975,7 +975,7 @@ Result<> IRBuilder::visitEnd() { } if (auto* func = scope.getFunction()) { if (auto* loc = std::get_if<Function::DebugLocation>(&debugLoc)) { - func->epilogLocation.insert(*loc); + func->epilogLocation = *loc; } } debugLoc = CanReceiveDebug(); diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index f86fb58b9..61f59c76a 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -3096,8 +3096,8 @@ ModuleStackIR::ModuleStackIR(Module& wasm, const PassOptions& options) }) {} void StackIRToBinaryWriter::write() { - if (func->prologLocation.size()) { - parent.writeDebugLocation(*func->prologLocation.begin()); + if (func->prologLocation) { + parent.writeDebugLocation(*func->prologLocation); } writer.mapLocalsAndEmitHeader(); // Stack to track indices of catches within a try @@ -3158,8 +3158,8 @@ void StackIRToBinaryWriter::write() { } // Indicate the debug location corresponding to the end opcode that // terminates the function code. - if (func->epilogLocation.size()) { - parent.writeDebugLocation(*func->epilogLocation.begin()); + if (func->epilogLocation) { + parent.writeDebugLocation(*func->epilogLocation); } else { // The end opcode has no debug location. parent.writeNoDebugLocation(); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 38f35411f..f5806b184 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -1503,8 +1503,8 @@ void Function::clearNames() { localNames.clear(); } void Function::clearDebugInfo() { localIndices.clear(); debugLocations.clear(); - prologLocation.clear(); - epilogLocation.clear(); + prologLocation.reset(); + epilogLocation.reset(); } template<typename Map> diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index c3d281f1c..102d3ca2a 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -3,7 +3,7 @@ include_directories(../../src/wasm) set(unittest_SOURCES arena.cpp - binary-reader.cpp + source-map.cpp cfg.cpp dfa_minimization.cpp disjoint_sets.cpp diff --git a/test/gtest/binary-reader.cpp b/test/gtest/source-map.cpp index b73fe55bd..c943be172 100644 --- a/test/gtest/binary-reader.cpp +++ b/test/gtest/source-map.cpp @@ -14,24 +14,19 @@ * limitations under the License. */ -#include "parser/wat-parser.h" +#include "source-map.h" #include "print-test.h" -#include "wasm-binary.h" #include "gtest/gtest.h" using namespace wasm; -using BinaryReaderTest = PrintTest; +using SourceMapTest = PrintTest; // Check that debug location parsers can handle single-segment mappings. -TEST_F(BinaryReaderTest, SourceMappingSingleSegment) { - auto moduleText = "(module)"; - Module module; - parseWast(module, moduleText); - - BufferWithRandomAccess buffer; - WasmBinaryWriter(&module, buffer, PassOptions()); - auto moduleBytes = buffer.getAsChars(); +TEST_F(SourceMapTest, SourceMappingSingleSegment) { + auto text = "(module)"; + Module wasm; + parseWast(wasm, text); // A single-segment mapping starting at offset 0. std::string sourceMap = R"( @@ -42,22 +37,15 @@ TEST_F(BinaryReaderTest, SourceMappingSingleSegment) { "mappings": "A" } )"; - std::stringstream sourceMapStream(sourceMap); + std::vector<char> buffer(sourceMap.begin(), sourceMap.end()); + + SourceMapReader reader(buffer); // Test `readSourceMapHeader` (only check for errors, as there is no mapping // to print). - { - Module module; - WasmBinaryReader binaryReader(module, FeatureSet::All, moduleBytes); - binaryReader.setDebugLocations(&sourceMapStream); - binaryReader.readSourceMapHeader(); - } + reader.readHeader(wasm); // Test `readNextDebugLocation`. - { - Module module; - WasmBinaryReader binaryReader(module, FeatureSet::All, moduleBytes); - binaryReader.setDebugLocations(&sourceMapStream); - binaryReader.readNextDebugLocation(); - } + // TODO: Actually check the result. + reader.readDebugLocationAt(1); } |