summaryrefslogtreecommitdiff
path: root/src/wasm/wasm-binary.cpp
diff options
context:
space:
mode:
authorThomas Lively <tlively@google.com>2024-12-03 11:20:36 -0800
committerGitHub <noreply@github.com>2024-12-03 11:20:36 -0800
commit87f9dac127b387715d8d96ac7ec8fd469d8c2dab (patch)
treeaa1aec906bf6adc3ea3d93c56616d393850e2249 /src/wasm/wasm-binary.cpp
parentf331120e4b942a795d4a6b6d0d5a3d781c1e6a4c (diff)
downloadbinaryen-87f9dac127b387715d8d96ac7ec8fd469d8c2dab.tar.gz
binaryen-87f9dac127b387715d8d96ac7ec8fd469d8c2dab.tar.bz2
binaryen-87f9dac127b387715d8d96ac7ec8fd469d8c2dab.zip
[NFC] Encapsulate source map reader state (#7132)
Move all state relevant to reading source maps out of WasmBinaryReader and into a new utility, SourceMapReader. This is a prerequisite for parallelizing the parsing of function bodies, since the source map reader state is different at the beginning of each function. Also take the opportunity to simplify the way we read source maps, for example by deferring the reading of anything but the position of a debug location until it will be used and by using `std::optional` instead of singleton `std::set`s to store function prologue and epilogue debug locations.
Diffstat (limited to 'src/wasm/wasm-binary.cpp')
-rw-r--r--src/wasm/wasm-binary.cpp259
1 files changed, 9 insertions, 250 deletions
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 82ac422ea..86b3ea899 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -1737,10 +1737,10 @@ void WasmBinaryWriter::writeField(const Field& field) {
WasmBinaryReader::WasmBinaryReader(Module& wasm,
FeatureSet features,
- const std::vector<char>& input)
- : wasm(wasm), allocator(wasm.allocator), input(input), sourceMap(nullptr),
- nextDebugPos(0), nextDebugLocation{0, 0, 0, std::nullopt},
- nextDebugLocationHasDebugInfo(false), debugLocation(), builder(wasm) {
+ const std::vector<char>& input,
+ const std::vector<char>& sourceMap)
+ : wasm(wasm), allocator(wasm.allocator), input(input), builder(wasm),
+ sourceMapReader(sourceMap) {
wasm.features = features;
}
@@ -1788,7 +1788,7 @@ void WasmBinaryReader::read() {
}
readHeader();
- readSourceMapHeader();
+ sourceMapReader.readHeader(wasm);
// Read sections until the end
while (more()) {
@@ -2804,12 +2804,10 @@ void WasmBinaryReader::readFunctions() {
BinaryLocation(pos - codeSectionLocation + size)};
}
- readNextDebugLocation();
+ func->prologLocation = sourceMapReader.readDebugLocationAt(pos);
readVars();
setLocalNames(*func, numFuncImports + i);
-
- func->prologLocation = debugLocation;
{
// Process the function body. Even if we are skipping function bodies we
// need to not skip the start function. That contains important code for
@@ -2846,11 +2844,9 @@ void WasmBinaryReader::readFunctions() {
}
}
+ sourceMapReader.finishFunction();
TypeUpdating::handleNonDefaultableLocals(func.get(), wasm);
-
- std::swap(func->epilogLocation, debugLocation);
currFunction = nullptr;
- debugLocation.clear();
}
}
@@ -2879,9 +2875,8 @@ void WasmBinaryReader::readVars() {
}
Result<> WasmBinaryReader::readInst() {
- readNextDebugLocation();
- if (debugLocation.size()) {
- builder.setDebugLocation(*debugLocation.begin());
+ if (auto loc = sourceMapReader.readDebugLocationAt(pos)) {
+ builder.setDebugLocation(loc);
}
uint8_t code = getInt8();
switch (code) {
@@ -4273,242 +4268,6 @@ void WasmBinaryReader::readExports() {
}
}
-static int32_t readBase64VLQ(std::istream& in) {
- uint32_t value = 0;
- uint32_t shift = 0;
- while (1) {
- auto ch = in.get();
- if (ch == EOF) {
- throw MapParseException("unexpected EOF in the middle of VLQ");
- }
- if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch < 'g')) {
- // last number digit
- uint32_t digit = ch < 'a' ? ch - 'A' : ch - 'a' + 26;
- value |= digit << shift;
- break;
- }
- if (!(ch >= 'g' && ch <= 'z') && !(ch >= '0' && ch <= '9') && ch != '+' &&
- ch != '/') {
- throw MapParseException("invalid VLQ digit");
- }
- uint32_t digit =
- ch > '9' ? ch - 'g' : (ch >= '0' ? ch - '0' + 20 : (ch == '+' ? 30 : 31));
- value |= digit << shift;
- shift += 5;
- }
- return value & 1 ? -int32_t(value >> 1) : int32_t(value >> 1);
-}
-
-void WasmBinaryReader::readSourceMapHeader() {
- if (!sourceMap) {
- return;
- }
-
- auto skipWhitespace = [&]() {
- while (sourceMap->peek() == ' ' || sourceMap->peek() == '\n') {
- sourceMap->get();
- }
- };
-
- auto maybeReadChar = [&](char expected) {
- if (sourceMap->peek() != expected) {
- return false;
- }
- sourceMap->get();
- return true;
- };
-
- auto mustReadChar = [&](char expected) {
- char c = sourceMap->get();
- if (c != expected) {
- throw MapParseException(std::string("Unexpected char: expected '") +
- expected + "' got '" + c + "'");
- }
- };
-
- auto findField = [&](const char* name) {
- bool matching = false;
- size_t len = strlen(name);
- size_t pos;
- while (1) {
- int ch = sourceMap->get();
- if (ch == EOF) {
- return false;
- }
- if (ch == '\"') {
- if (matching) {
- // we matched a terminating quote.
- if (pos == len) {
- break;
- }
- matching = false;
- } else {
- matching = true;
- pos = 0;
- }
- } else if (matching && name[pos] == ch) {
- ++pos;
- } else if (matching) {
- matching = false;
- }
- }
- skipWhitespace();
- mustReadChar(':');
- skipWhitespace();
- return true;
- };
-
- auto readString = [&](std::string& str) {
- std::vector<char> vec;
- skipWhitespace();
- mustReadChar('\"');
- if (!maybeReadChar('\"')) {
- while (1) {
- int ch = sourceMap->get();
- if (ch == EOF) {
- throw MapParseException("unexpected EOF in the middle of string");
- }
- if (ch == '\"') {
- break;
- }
- vec.push_back(ch);
- }
- }
- skipWhitespace();
- str = std::string(vec.begin(), vec.end());
- };
-
- if (!findField("sources")) {
- throw MapParseException("cannot find the 'sources' field in map");
- }
-
- skipWhitespace();
- mustReadChar('[');
- if (!maybeReadChar(']')) {
- do {
- std::string file;
- readString(file);
- Index index = wasm.debugInfoFileNames.size();
- wasm.debugInfoFileNames.push_back(file);
- debugInfoFileIndices[file] = index;
- } while (maybeReadChar(','));
- mustReadChar(']');
- }
-
- if (findField("names")) {
- skipWhitespace();
- mustReadChar('[');
- if (!maybeReadChar(']')) {
- do {
- std::string symbol;
- readString(symbol);
- Index index = wasm.debugInfoSymbolNames.size();
- wasm.debugInfoSymbolNames.push_back(symbol);
- debugInfoSymbolNameIndices[symbol] = index;
- } while (maybeReadChar(','));
- mustReadChar(']');
- }
- }
-
- if (!findField("mappings")) {
- throw MapParseException("cannot find the 'mappings' field in map");
- }
-
- mustReadChar('\"');
- if (maybeReadChar('\"')) { // empty mappings
- nextDebugPos = 0;
- return;
- }
- // read first debug location
- // TODO: Handle the case where the very first one has only a position but not
- // debug info. In practice that does not happen, which needs
- // investigation (if it does, it will assert in readBase64VLQ, so it
- // would not be a silent error at least).
- uint32_t position = readBase64VLQ(*sourceMap);
- nextDebugPos = position;
-
- auto peek = sourceMap->peek();
- if (peek == ',' || peek == '\"') {
- // This is a 1-length entry, so the next location has no debug info.
- nextDebugLocationHasDebugInfo = false;
- } else {
- uint32_t fileIndex = readBase64VLQ(*sourceMap);
- uint32_t lineNumber =
- readBase64VLQ(*sourceMap) + 1; // adjust zero-based line number
- uint32_t columnNumber = readBase64VLQ(*sourceMap);
- std::optional<BinaryLocation> symbolNameIndex;
- peek = sourceMap->peek();
- if (!(peek == ',' || peek == '\"')) {
- symbolNameIndex = readBase64VLQ(*sourceMap);
- }
- nextDebugLocation = {fileIndex, lineNumber, columnNumber, symbolNameIndex};
- nextDebugLocationHasDebugInfo = true;
- }
-}
-
-void WasmBinaryReader::readNextDebugLocation() {
- if (!sourceMap) {
- return;
- }
-
- if (nextDebugPos == 0) {
- // We reached the end of the source map; nothing left to read.
- return;
- }
-
- while (nextDebugPos && nextDebugPos <= pos) {
- debugLocation.clear();
- // use debugLocation only for function expressions
- if (currFunction) {
- if (nextDebugLocationHasDebugInfo) {
- debugLocation.insert(nextDebugLocation);
- } else {
- debugLocation.clear();
- }
- }
-
- char ch;
- *sourceMap >> ch;
- if (ch == '\"') { // end of records
- nextDebugPos = 0;
- break;
- }
- if (ch != ',') {
- throw MapParseException("Unexpected delimiter");
- }
-
- int32_t positionDelta = readBase64VLQ(*sourceMap);
- uint32_t position = nextDebugPos + positionDelta;
-
- nextDebugPos = position;
-
- auto peek = sourceMap->peek();
- if (peek == ',' || peek == '\"') {
- // This is a 1-length entry, so the next location has no debug info.
- nextDebugLocationHasDebugInfo = false;
- break;
- }
-
- int32_t fileIndexDelta = readBase64VLQ(*sourceMap);
- uint32_t fileIndex = nextDebugLocation.fileIndex + fileIndexDelta;
- int32_t lineNumberDelta = readBase64VLQ(*sourceMap);
- uint32_t lineNumber = nextDebugLocation.lineNumber + lineNumberDelta;
- int32_t columnNumberDelta = readBase64VLQ(*sourceMap);
- uint32_t columnNumber = nextDebugLocation.columnNumber + columnNumberDelta;
-
- std::optional<BinaryLocation> symbolNameIndex;
- peek = sourceMap->peek();
- if (!(peek == ',' || peek == '\"')) {
- int32_t symbolNameIndexDelta = readBase64VLQ(*sourceMap);
- symbolNameIndex =
- nextDebugLocation.symbolNameIndex.value_or(0) + symbolNameIndexDelta;
- }
-
- nextDebugLocation = {fileIndex, lineNumber, columnNumber, symbolNameIndex};
- nextDebugLocationHasDebugInfo = true;
- }
-}
-
Expression* WasmBinaryReader::readExpression() {
assert(builder.empty());
while (input[pos] != BinaryConsts::End) {