diff options
author | Alon Zakai <alonzakai@gmail.com> | 2017-02-07 11:24:57 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-07 11:24:57 -0800 |
commit | 76c8f82d5cf98171ff46ed4bf36791d3b891357f (patch) | |
tree | ab251df55b1274b053896b10b14faf2ac8e526cd /src | |
parent | 985bcba6239792ebcb3808f1066ca6ea20ac5688 (diff) | |
download | binaryen-76c8f82d5cf98171ff46ed4bf36791d3b891357f.tar.gz binaryen-76c8f82d5cf98171ff46ed4bf36791d3b891357f.tar.bz2 binaryen-76c8f82d5cf98171ff46ed4bf36791d3b891357f.zip |
asm2wasm debuginfo (#895)
* parse file/line comments in asm.js into debug intrinsics
* convert debug intrinsics into annotations, and print them
* ignore --debuginfo if not emitting text, as wasm binaries don't support that yet
* emit full debug info when -g and emitting text; when -g and emitting binary, all we can do is the Names section
* update wasm.js
Diffstat (limited to 'src')
-rw-r--r-- | src/asm2wasm.h | 180 | ||||
-rw-r--r-- | src/mixed_arena.h | 1 | ||||
-rw-r--r-- | src/passes/Print.cpp | 13 | ||||
-rw-r--r-- | src/tools/asm2wasm.cpp | 12 | ||||
-rw-r--r-- | src/wasm-js.cpp | 3 | ||||
-rw-r--r-- | src/wasm.h | 3 |
6 files changed, 204 insertions, 8 deletions
diff --git a/src/asm2wasm.h b/src/asm2wasm.h index eaf4f8c18..32b6af5b0 100644 --- a/src/asm2wasm.h +++ b/src/asm2wasm.h @@ -34,6 +34,7 @@ #include "ast_utils.h" #include "wasm-builder.h" #include "wasm-emscripten.h" +#include "wasm-printing.h" #include "wasm-validator.h" #include "wasm-module-building.h" @@ -109,7 +110,8 @@ Name I32_CTTZ("i32_cttz"), FTCALL("ftCall_"), MFTCALL("mftCall_"), MAX_("max"), - MIN_("min"); + MIN_("min"), + EMSCRIPTEN_DEBUGINFO("emscripten_debuginfo"); // Utilities @@ -148,6 +150,14 @@ struct AstStackHelper { std::vector<Ref> AstStackHelper::astStack; +static bool startsWith(const char* string, const char *prefix) { + while (1) { + if (*prefix == 0) return true; + if (*string == 0) return false; + if (*string++ != *prefix++) return false; + } +}; + // // Asm2WasmPreProcessor - does some initial parsing/processing // of asm.js code. @@ -155,6 +165,16 @@ std::vector<Ref> AstStackHelper::astStack; struct Asm2WasmPreProcessor { bool memoryGrowth = false; + bool debugInfo = false; + + std::vector<std::string> debugInfoFileNames; + std::unordered_map<std::string, Index> debugInfoFileIndices; + + char* allocatedCopy = nullptr; + + ~Asm2WasmPreProcessor() { + if (allocatedCopy) free(allocatedCopy); + } char* process(char* input) { // emcc --separate-asm modules can look like @@ -206,6 +226,79 @@ struct Asm2WasmPreProcessor { *marker = START_FUNCS[0]; } + // handle debug info, if this build wants that. + if (debugInfo) { + // asm.js debug info comments look like + // ..command..; //@line 4 "tests/hello_world.c" + // we convert those into emscripten_debuginfo(file, line) + // calls, where the params are indices into a mapping. then + // the compiler and optimizer can operate on them. after + // that, we can apply the debug info to the wasm node right + // before it - this is guaranteed to be correct without opts, + // and is usually decently accurate with them. + const auto SCALE_FACTOR = 1.25; // an upper bound on how much more space we need as a multiple of the original + const auto ADD_FACTOR = 100; // an upper bound on how much we write for each debug info element itself + auto size = strlen(input); + auto upperBound = Index(size * SCALE_FACTOR) + ADD_FACTOR; + char* copy = allocatedCopy = (char*)malloc(upperBound); + char* end = copy + upperBound; + char* out = copy; + std::string DEBUGINFO_INTRINSIC = EMSCRIPTEN_DEBUGINFO.str; + auto DEBUGINFO_INTRINSIC_SIZE = DEBUGINFO_INTRINSIC.size(); + bool seenUseAsm = false; + while (input[0]) { + if (out + ADD_FACTOR >= end) { + Fatal() << "error in handling debug info"; + } + if (startsWith(input, "//@line")) { + char* linePos = input + 8; + char* lineEnd = strchr(input + 8, ' '); + char* filePos = strchr(lineEnd, '"') + 1; + char* fileEnd = strchr(filePos, '"'); + input = fileEnd + 1; + *lineEnd = 0; + *fileEnd = 0; + std::string line = linePos, file = filePos; + auto iter = debugInfoFileIndices.find(file); + if (iter == debugInfoFileIndices.end()) { + Index index = debugInfoFileNames.size(); + debugInfoFileNames.push_back(file); + debugInfoFileIndices[file] = index; + } + std::string fileIndex = std::to_string(debugInfoFileIndices[file]); + // write out the intrinsic + strcpy(out, DEBUGINFO_INTRINSIC.c_str()); + out += DEBUGINFO_INTRINSIC_SIZE; + *out++ = '('; + strcpy(out, fileIndex.c_str()); + out += fileIndex.size(); + *out++ = ','; + strcpy(out, line.c_str()); + out += line.size(); + *out++ = ')'; + *out++ = ';'; + } else if (!seenUseAsm && (startsWith(input, "asm'") || startsWith(input, "asm\""))) { + // end of "use asm" or "almost asm" + const auto SKIP = 5; // skip the end of "use asm"; (5 chars, a,s,m," or ',;) + seenUseAsm = true; + memcpy(out, input, SKIP); + out += SKIP; + input += SKIP; + // add a fake import for the intrinsic, so the module validates + std::string import = "\n var emscripten_debuginfo = env.emscripten_debuginfo;"; + strcpy(out, import.c_str()); + out += import.size(); + } else { + *out++ = *input++; + } + } + if (out >= end) { + Fatal() << "error in handling debug info"; + } + *out = 0; + input = copy; + } + return input; } }; @@ -237,7 +330,7 @@ class Asm2WasmBuilder { // function table std::map<IString, int> functionTableStarts; // each asm function table gets a range in the one wasm table, starting at a location - bool memoryGrowth; + Asm2WasmPreProcessor& preprocessor; bool debug; bool imprecise; PassOptions passOptions; @@ -343,11 +436,11 @@ private: } public: - Asm2WasmBuilder(Module& wasm, bool memoryGrowth, bool debug, bool imprecise, PassOptions passOptions, bool runOptimizationPasses, bool wasmOnly) + Asm2WasmBuilder(Module& wasm, Asm2WasmPreProcessor& preprocessor, bool debug, bool imprecise, PassOptions passOptions, bool runOptimizationPasses, bool wasmOnly) : wasm(wasm), allocator(wasm.allocator), builder(wasm), - memoryGrowth(memoryGrowth), + preprocessor(preprocessor), debug(debug), imprecise(imprecise), passOptions(passOptions), @@ -565,6 +658,16 @@ private: } Function* processFunction(Ref ast); + +public: + CallImport* checkDebugInfo(Expression* curr) { + if (auto* call = curr->dynCast<CallImport>()) { + if (call->target == EMSCRIPTEN_DEBUGINFO) { + return call; + } + } + return nullptr; + } }; void Asm2WasmBuilder::processAsm(Ref ast) { @@ -1014,6 +1117,43 @@ void Asm2WasmBuilder::processAsm(Ref ast) { } }; + // apply debug info, reducing intrinsic calls into annotations on the ast nodes + struct ApplyDebugInfo : public WalkerPass<PostWalker<ApplyDebugInfo, UnifiedExpressionVisitor<ApplyDebugInfo>>> { + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new ApplyDebugInfo(parent); } + + Asm2WasmBuilder* parent; + + ApplyDebugInfo(Asm2WasmBuilder* parent) : parent(parent) { + name = "apply-debug-info"; + } + + Expression* lastExpression = nullptr; + + void visitExpression(Expression* curr) { + if (auto* call = parent->checkDebugInfo(curr)) { + // this is a debuginfo node. turn it into an annotation on the last stack + auto* last = lastExpression; + lastExpression = nullptr; + auto& annotations = getFunction()->annotations; + if (last) { + auto fileIndex = call->operands[0]->cast<Const>()->value.geti32(); + auto lineNumber = call->operands[1]->cast<Const>()->value.geti32(); + annotations[last] = parent->preprocessor.debugInfoFileNames[fileIndex] + ":" + std::to_string(lineNumber); + } + // eliminate the debug info call + ExpressionManipulator::nop(curr); + return; + } + // ignore const nodes, as they may be the children of the debug info calls, and they + // don't really need debug info anyhow + if (!curr->is<Const>()) { + lastExpression = curr; + } + } + }; + PassRunner passRunner(&wasm); if (debug) { passRunner.setDebug(true); @@ -1030,13 +1170,22 @@ void Asm2WasmBuilder::processAsm(Ref ast) { passRunner.add("optimize-instructions"); passRunner.add("post-emscripten"); } + if (preprocessor.debugInfo) { + passRunner.add<ApplyDebugInfo>(this); + passRunner.add("vacuum"); // FIXME maybe just remove the nops that were debuginfo nodes, if not optimizing? + } // make sure to not emit unreachable code at all, even in -O0, as wasm rules for it are complex // and changing. passRunner.add("dce"); passRunner.run(); + // remove the debug info intrinsic + if (preprocessor.debugInfo) { + wasm.removeImport(EMSCRIPTEN_DEBUGINFO); + } + // apply memory growth, if relevant - if (memoryGrowth) { + if (preprocessor.memoryGrowth) { emscripten::generateMemoryGrowthFunction(wasm); wasm.memory.max = Memory::kMaxSize; } @@ -2273,6 +2422,27 @@ Function* Asm2WasmBuilder::processFunction(Ref ast) { }; // body function->body = processStatements(body, start); + // debug info cleanup: we add debug info calls after each instruction; as + // a result, + // return 0; //@line file.cpp + // will have code after the return. if the function body is a block, + // it will be forced to the return type of the function, and then + // the unreachable type of the return makes things work, which we break + // if we add a none debug intrinsic call afterwards. so we need to fix + // that up. + if (preprocessor.debugInfo) { + if (function->result != none) { + if (auto* block = function->body->dynCast<Block>()) { + if (block->list.size() > 0) { + if (checkDebugInfo(block->list.back())) { + // add an unreachable. both the debug info and it could be dce'd, + // but it makes us validate properly. + block->list.push_back(builder.makeUnreachable()); + } + } + } + } + } // cleanups/checks assert(breakStack.size() == 0 && continueStack.size() == 0); assert(parentLabel.isNull()); diff --git a/src/mixed_arena.h b/src/mixed_arena.h index 52e47fbde..af585092e 100644 --- a/src/mixed_arena.h +++ b/src/mixed_arena.h @@ -19,6 +19,7 @@ #include <atomic> #include <cassert> +#include <cstdlib> #include <memory> #include <mutex> #include <thread> diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 3c847809f..e5ff03fae 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -45,6 +45,19 @@ struct PrintSExpression : public Visitor<PrintSExpression> { } } + void visit(Expression* curr) { + if (currFunction) { + // show an annotation, if there is one + auto& annotations = currFunction->annotations; + auto iter = annotations.find(curr); + if (iter != annotations.end()) { + o << ";; " << iter->second << '\n'; + doIndent(o, indent); + } + } + Visitor<PrintSExpression>::visit(curr); + } + void setMinify(bool minify_) { minify = minify_; maybeSpace = minify ? "" : " "; diff --git a/src/tools/asm2wasm.cpp b/src/tools/asm2wasm.cpp index c880e7459..01602c8b7 100644 --- a/src/tools/asm2wasm.cpp +++ b/src/tools/asm2wasm.cpp @@ -84,7 +84,7 @@ int main(int argc, const char *argv[]) { [&wasmOnly](Options *o, const std::string &) { wasmOnly = true; }) - .add("--debuginfo", "-g", "Emit names section and debug info", + .add("--debuginfo", "-g", "Emit names section and debug info (for debug info you must emit text, -S, for this to work)", Options::Arguments::Zero, [&](Options *o, const std::string &arguments) { debugInfo = true; }) .add("--symbolmap", "-s", "Emit a symbol map (indexes => names)", @@ -99,6 +99,12 @@ int main(int argc, const char *argv[]) { }); options.parse(argc, argv); + // finalize arguments + if (options.extra["output"].size() == 0) { + // when no output file is specified, we emit text to stdout + emitBinary = false; + } + const auto &tm_it = options.extra.find("total memory"); size_t totalMemory = tm_it == options.extra.end() ? 16 * 1024 * 1024 : atoi(tm_it->second.c_str()); @@ -109,6 +115,8 @@ int main(int argc, const char *argv[]) { } Asm2WasmPreProcessor pre; + // wasm binaries can contain a names section, but not full debug info + pre.debugInfo = debugInfo && !emitBinary; auto input( read_file<std::vector<char>>(options.extra["infile"], Flags::Text, options.debug ? Flags::Debug : Flags::Release)); char *start = pre.process(input.data()); @@ -120,7 +128,7 @@ int main(int argc, const char *argv[]) { if (options.debug) std::cerr << "wasming..." << std::endl; Module wasm; wasm.memory.initial = wasm.memory.max = totalMemory / Memory::kPageSize; - Asm2WasmBuilder asm2wasm(wasm, pre.memoryGrowth, options.debug, imprecise, passOptions, runOptimizationPasses, wasmOnly); + Asm2WasmBuilder asm2wasm(wasm, pre, options.debug, imprecise, passOptions, runOptimizationPasses, wasmOnly); asm2wasm.processAsm(asmjs); // import mem init file, if provided diff --git a/src/wasm-js.cpp b/src/wasm-js.cpp index 2735f84f8..970214833 100644 --- a/src/wasm-js.cpp +++ b/src/wasm-js.cpp @@ -59,6 +59,7 @@ extern "C" void EMSCRIPTEN_KEEPALIVE load_asm2wasm(char *input) { prepare2wasm(); Asm2WasmPreProcessor pre; + pre.debugInfo = true; // FIXME: we must do this, as the input asm.js might have debug info input = pre.process(input); // proceed to parse and wasmify @@ -79,7 +80,7 @@ extern "C" void EMSCRIPTEN_KEEPALIVE load_asm2wasm(char *input) { module->memory.max = pre.memoryGrowth ? Address(Memory::kMaxSize) : module->memory.initial; if (wasmJSDebug) std::cerr << "wasming...\n"; - asm2wasm = new Asm2WasmBuilder(*module, pre.memoryGrowth, debug, false /* TODO: support imprecise? */, PassOptions(), false /* TODO: support optimizing? */, false /* TODO: support asm2wasm-i64? */); + asm2wasm = new Asm2WasmBuilder(*module, pre, debug, false /* TODO: support imprecise? */, PassOptions(), false /* TODO: support optimizing? */, false /* TODO: support asm2wasm-i64? */); asm2wasm->processAsm(asmjs); } diff --git a/src/wasm.h b/src/wasm.h index 75d6a174c..31dafb84b 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -1385,6 +1385,9 @@ public: std::vector<Name> localNames; std::map<Name, Index> localIndices; + // node annotations, printed alongside the node in the text format + std::unordered_map<Expression*, std::string> annotations; + Function() : result(none) {} size_t getNumParams() { |