diff options
author | Derek Schuff <dschuff@chromium.org> | 2016-05-05 08:01:57 -0700 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2016-05-05 08:01:57 -0700 |
commit | 2811727a118e92e6b7ef293458f9bab1cf5dacdc (patch) | |
tree | ad18967b67f3596b28f3013ad6f05cc269d126c6 /src | |
parent | 1397997bc91663f19c387c69c7e47930efe57539 (diff) | |
download | binaryen-2811727a118e92e6b7ef293458f9bab1cf5dacdc.tar.gz binaryen-2811727a118e92e6b7ef293458f9bab1cf5dacdc.tar.bz2 binaryen-2811727a118e92e6b7ef293458f9bab1cf5dacdc.zip |
[Linker] Handle archive files
Add a class to parse archive files.
Support linking archive files, with archive semantics (i.e. an archive
member is linked in if it satisfies an undefined reference).
Archive files must be gnu-format archives containing .s files.
Add tests for linking semantics.
Diffstat (limited to 'src')
-rw-r--r-- | src/s2wasm-main.cpp | 20 | ||||
-rw-r--r-- | src/s2wasm.h | 18 | ||||
-rw-r--r-- | src/support/archive.cpp | 239 | ||||
-rw-r--r-- | src/support/archive.h | 105 | ||||
-rw-r--r-- | src/wasm-linker.cpp | 26 | ||||
-rw-r--r-- | src/wasm-linker.h | 9 |
6 files changed, 402 insertions, 15 deletions
diff --git a/src/s2wasm-main.cpp b/src/s2wasm-main.cpp index 7b0e4c179..9688f6696 100644 --- a/src/s2wasm-main.cpp +++ b/src/s2wasm-main.cpp @@ -32,6 +32,7 @@ int main(int argc, const char *argv[]) { bool ignoreUnknownSymbols = false; bool generateEmscriptenGlue = false; std::string startFunction; + std::vector<std::string> archiveLibraries; Options options("s2wasm", "Link .s file into .wast"); options .add("--output", "-o", "Output file (stdout if not specified)", @@ -75,13 +76,19 @@ int main(int argc, const char *argv[]) { [&generateEmscriptenGlue](Options *, const std::string &) { generateEmscriptenGlue = true; }) + .add("--library", "-l", "Add archive library", + Options::Arguments::N, + [&archiveLibraries](Options *o, const std::string &argument) { + archiveLibraries.push_back(argument); + }) .add_positional("INFILE", Options::Arguments::One, [](Options *o, const std::string &argument) { o->extra["infile"] = argument; }); options.parse(argc, argv); - auto input(read_file<std::string>(options.extra["infile"], Flags::Text, options.debug ? Flags::Debug : Flags::Release)); + auto debugFlag = options.debug ? Flags::Debug : Flags::Release; + auto input(read_file<std::string>(options.extra["infile"], Flags::Text, debugFlag)); if (options.debug) std::cerr << "Parsing and wasming..." << std::endl; uint64_t globalBase = options.extra.find("global-base") != options.extra.end() @@ -107,10 +114,13 @@ int main(int argc, const char *argv[]) { S2WasmBuilder mainbuilder(input.c_str(), options.debug); linker.linkObject(mainbuilder); - // In the future, there will be code to open additional files/buffers and - // link additional objects, as well as archive members (which only get linked if needed), e.g.: - // S2WasmBuilder lazyObject(some_other_buffer, options.debug) - // linker.linkLazyObject(lazyObject); // calls builder.scan to get symbol info, then build + for (const auto& m : archiveLibraries) { + auto archiveFile(read_file<std::vector<char>>(m, Flags::Binary, debugFlag)); + bool error; + Archive lib(archiveFile, error); + if (error) Fatal() << "Error opening archive " << m << "\n"; + linker.linkArchive(lib); + } linker.layout(); diff --git a/src/s2wasm.h b/src/s2wasm.h index 941779ae5..9c0ac81e2 100644 --- a/src/s2wasm.h +++ b/src/s2wasm.h @@ -43,6 +43,7 @@ class S2WasmBuilder { Module* wasm; MixedArena* allocator; LinkerObject* linkerObj; + std::unique_ptr<LinkerObject::SymbolInfo> symbolInfo; public: S2WasmBuilder(const char* input, bool debug) @@ -54,9 +55,10 @@ class S2WasmBuilder { linkerObj(nullptr) {} - void build(LinkerObject *obj, LinkerObject::SymbolInfo* info) { - if (!obj->isEmpty()) Fatal() << "Cannot construct an S2WasmBuilder in an non-empty LinkerObject"; - if (!info) info = getSymbolInfo(); + void build(LinkerObject *obj) { + // If getSymbolInfo has not already been called, populate the symbol + // info now. + if (!symbolInfo) symbolInfo.reset(getSymbolInfo()); linkerObj = obj; wasm = &obj->wasm; allocator = &wasm->allocator; @@ -65,10 +67,14 @@ class S2WasmBuilder { process(); } + // getSymbolInfo scans the .s file to determine what symbols it defines + // and references. LinkerObject::SymbolInfo* getSymbolInfo() { - auto* info = new LinkerObject::SymbolInfo(); - scan(info); - return info; + if (!symbolInfo) { + symbolInfo = make_unique<LinkerObject::SymbolInfo>(); + scan(symbolInfo.get()); + } + return symbolInfo.get(); } private: diff --git a/src/support/archive.cpp b/src/support/archive.cpp new file mode 100644 index 000000000..3879f72a7 --- /dev/null +++ b/src/support/archive.cpp @@ -0,0 +1,239 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "support/archive.h" + +#include <cstring> +#include "support/utilities.h" + +static const char* const magic = "!<arch>\n"; + +class ArchiveMemberHeader { + public: + uint8_t fileName[16]; + uint8_t timestamp[12]; + uint8_t UID[6]; + uint8_t GID[6]; + uint8_t accessMode[8]; + uint8_t size[10]; // Size of data only, not including padding or header + uint8_t magic[2]; + + std::string getName() const; + // Members are not larger than 4GB + uint32_t getSize() const; +}; + +std::string ArchiveMemberHeader::getName() const { + char endChar; + if (fileName[0] == '/') { + // Special name (string table or reference, or symbol table) + endChar = ' '; + } else { + endChar = '/'; // regular name + } + auto* end = + static_cast<const uint8_t*>(memchr(fileName, endChar, sizeof(fileName))); + if (!end) end = fileName + sizeof(fileName); + return std::string((char*)(fileName), end - fileName); +} + +uint32_t ArchiveMemberHeader::getSize() const { + auto* end = static_cast<const char*>(memchr(size, ' ', sizeof(size))); + std::string sizeString((const char*)size, end); + auto sizeInt = std::stoll(sizeString, nullptr, 10); + if (sizeInt < 0 || sizeInt >= std::numeric_limits<uint32_t>::max()) { + wasm::Fatal() << "Malformed archive: size parsing failed\n"; + } + return static_cast<uint32_t>(sizeInt); +} + +Archive::Archive(Buffer& b, bool& error) : data(b) { + error = false; + if (data.size() < strlen(magic) || + memcmp(data.data(), magic, strlen(magic))) { + error = true; + return; + } + + // We require GNU format archives. So the first member may be named "/" and it + // points to the symbol table. The next member may optionally be "//" and + // point to a string table if a filename is too large to fit in the 16-char + // name field of the header. + child_iterator it = child_begin(false); + if (it.hasError()) { + error = true; + return; + } + child_iterator end = child_end(); + if (it == end) return; // Empty archive. + + const Child* c = &*it; + + auto increment = [&]() { + ++it; + error = it.hasError(); + if (error) return true; + c = &*it; + return false; + }; + + std::string name = c->getRawName(); + if (name == "/") { + symbolTable = c->getBuffer(); + if (increment() || it == end) return; + name = c->getRawName(); + } + + if (name == "//") { + stringTable = c->getBuffer(); + if (increment() || it == end) return; + setFirstRegular(*c); + return; + } + if (name[0] != '/') { + setFirstRegular(*c); + return; + } + // Not a GNU archive. + error = true; +} + +Archive::Child::Child(const Archive* parent, const uint8_t* data, bool* error) + : parent(parent), data(data) { + if (!data) return; + len = sizeof(ArchiveMemberHeader) + getHeader()->getSize(); + startOfFile = sizeof(ArchiveMemberHeader); +} + +uint32_t Archive::Child::getSize() const { return len - startOfFile; } + +Archive::SubBuffer Archive::Child::getBuffer() const { + return {data + startOfFile, getSize()}; +} + +std::string Archive::Child::getRawName() const { + return getHeader()->getName(); +} + +Archive::Child Archive::Child::getNext(bool& error) const { + size_t toSkip = len; + // Members are aligned to even byte boundaries. + if (toSkip & 1) ++toSkip; + const uint8_t* nextLoc = data + toSkip; + if (nextLoc >= (uint8_t*)&*parent->data.end()) { // End of the archive. + return Child(); + } + + return Child(parent, nextLoc, &error); +} + +std::string Archive::Child::getName() const { + std::string name = getRawName(); + // Check if it's a special name. + if (name[0] == '/') { + if (name.size() == 1) { // Linker member. + return name; + } + if (name.size() == 2 && name[1] == '/') { // String table. + return name; + } + // It's a long name. + // Get the offset. + int offset = std::stoi(name.substr(1), nullptr, 10); + + // Verify it. + if (offset < 0 || (unsigned)offset >= parent->stringTable.len) { + wasm::Fatal() << "Malformed archive: name parsing failed\n"; + } + + std::string addr(parent->stringTable.data + offset, + parent->stringTable.data + parent->stringTable.len); + + // GNU long file names end with a "/\n". + size_t end = addr.find('\n'); + return addr.substr(0, end - 1); + } + // It's a simple name. + if (name[name.size() - 1] == '/') { + return name.substr(0, name.size() - 1); + } + return name; +} + +Archive::child_iterator Archive::child_begin(bool SkipInternal) const { + if (data.size() == 0) return child_end(); + + if (SkipInternal) { + child_iterator it; + it.child = Child(this, firstRegularData, &it.error); + return it; + } + + auto* loc = (const uint8_t*)data.data() + strlen(magic); + child_iterator it; + it.child = Child(this, loc, &it.error); + return it; +} + +Archive::child_iterator Archive::child_end() const { return Child(); } + +namespace { +struct Symbol { + uint32_t symbolIndex; + uint32_t stringIndex; + void next(Archive::SubBuffer& symbolTable) { + // Symbol table entries are NUL-terminated. Skip past the next NUL. + stringIndex = strchr((char*)symbolTable.data + stringIndex, '\0') - + (char*)symbolTable.data + 1; + ++symbolIndex; + } +}; +} + +static uint32_t read32be(const uint8_t* buf) { + return static_cast<uint32_t>(buf[0]) << 24 | + static_cast<uint32_t>(buf[1]) << 16 | + static_cast<uint32_t>(buf[2]) << 8 | static_cast<uint32_t>(buf[3]); +} + +void Archive::dump() const { + printf("Archive data %p len %lu, firstRegularData %p\n", data.data(), + data.size(), firstRegularData); + printf("Symbol table %p, len %u\n", symbolTable.data, symbolTable.len); + printf("string table %p, len %u\n", stringTable.data, stringTable.len); + const uint8_t* buf = symbolTable.data; + if (!buf) { + for (auto c = child_begin(), e = child_end(); c != e; ++c) { + printf("Child %p, len %u, name %s, size %u\n", c->data, c->len, + c->getName().c_str(), c->getSize()); + } + return; + } + uint32_t symbolCount = read32be(buf); + printf("Symbol count %u\n", symbolCount); + buf += sizeof(uint32_t) + (symbolCount * sizeof(uint32_t)); + uint32_t string_start_offset = buf - symbolTable.data; + Symbol sym = {0, string_start_offset}; + while (sym.symbolIndex != symbolCount) { + printf("Symbol %u, offset %u\n", sym.symbolIndex, sym.stringIndex); + // get the member + uint32_t offset = read32be(symbolTable.data + sym.symbolIndex * 4); + auto* loc = (const uint8_t*)&data[offset]; + child_iterator it; + it.child = Child(this, loc, &it.error); + printf("Child %p, len %u\n", it.child.data, it.child.len); + } +} diff --git a/src/support/archive.h b/src/support/archive.h new file mode 100644 index 000000000..40fea2529 --- /dev/null +++ b/src/support/archive.h @@ -0,0 +1,105 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Minimal class for interacting with archives. The API is inspired by LLVM's + * Archive class, (in case we want to switch to using that at some point); + * however we are missing useful error-handling capabilities and other utilities + * that LLVM has (e.g. ErrorOr, MemoryBuffer, StringRef). + * We only support the GNU format (not the BSD or COFF variants) + */ + +#ifndef wasm_support_archive_h +#define wasm_support_archive_h + +#include <cstdint> +#include <vector> + +#include "wasm.h" + +class ArchiveMemberHeader; + +class Archive { + // Vector is char instead of uint8_t because read_file only works with char. + // Everything else is uint8_t to help distinguish between uses as + // uninterpreted bytes (most uses) and C strings (a few uses e.g. strchr) + // because most things in these buffers are not nul-terminated + using Buffer = std::vector<char>; + + public: + struct SubBuffer { + const uint8_t* data; + uint32_t len; + }; + class Child { + friend class Archive; + const Archive* parent = nullptr; + // Includes header but not padding byte. + const uint8_t* data = nullptr; + uint32_t len = 0; + // Offset from data to the start of the file + uint16_t startOfFile = 0; + const ArchiveMemberHeader* getHeader() const { + return reinterpret_cast<const ArchiveMemberHeader*>(data); + } + Child getNext(bool& error) const; + + public: + Child(){}; + Child(const Archive* parent, const uint8_t* data, bool* error); + // Size of actual member data (no header/padding) + uint32_t getSize() const; + SubBuffer getBuffer() const; + std::string getRawName() const; + std::string getName() const; + bool operator==(const Child& other) const { return data == other.data; } + }; + class child_iterator { + friend class Archive; + Child child; + bool error = false; // TODO: use std::error_code instead? + public: + child_iterator() {} + explicit child_iterator(bool error) : error(error) {} + child_iterator(const Child& c) : child(c) {} + const Child* operator->() const { return &child; } + const Child& operator*() const { return child; } + bool operator==(const child_iterator& other) const { + return child == other.child; + } + bool operator!=(const child_iterator& other) const { + return !(*this == other); + } + child_iterator& operator++() { + assert(!error); + child = child.getNext(error); + return *this; + } + bool hasError() const { return error; } + }; + Archive(Buffer& buffer, bool& error); + child_iterator child_begin(bool SkipInternal = true) const; + child_iterator child_end() const; + void dump() const; + + private: + void setFirstRegular(const Child& c) { firstRegularData = c.data; } + Buffer& data; + SubBuffer symbolTable = {nullptr, 0}; + SubBuffer stringTable = {nullptr, 0}; + const uint8_t* firstRegularData; +}; + +#endif // wasm_support_archive_h diff --git a/src/wasm-linker.cpp b/src/wasm-linker.cpp index a854f0cd0..f4f516073 100644 --- a/src/wasm-linker.cpp +++ b/src/wasm-linker.cpp @@ -55,6 +55,7 @@ void Linker::layout() { // Convert calls to undefined functions to call_imports for (const auto& f : out.undefinedFunctionCalls) { Name target = f.first; + if (!out.symbolInfo.undefinedFunctions.count(target)) continue; // Create an import for the target if necessary. if (!out.wasm.checkImport(target)) { auto import = new Import; @@ -210,12 +211,31 @@ bool Linker::linkObject(S2WasmBuilder& builder) { return false; } } - out.symbolInfo.merge(std::move(*newSymbols)); - builder.build(&out, &out.symbolInfo); - delete newSymbols; + out.symbolInfo.merge(*newSymbols); + builder.build(&out); return true; } +bool Linker::linkArchive(Archive& archive) { + for (auto child = archive.child_begin(), end = archive.child_end(); + child != end; ++child) { + Archive::SubBuffer memberBuf = child->getBuffer(); + // S2WasmBuilder expects its input to be NUL-terminated. Archive members are + // not NUL-terminated. So we have to copy the contents out before parsing. + std::vector<char> memberString(memberBuf.len + 1); + memcpy(memberString.data(), memberBuf.data, memberBuf.len); + memberString[memberBuf.len] = '\0'; + S2WasmBuilder memberBuilder(memberString.data(), false); + auto* memberSymbols = memberBuilder.getSymbolInfo(); + for (const Name& symbol : memberSymbols->implementedFunctions) { + if (out.symbolInfo.undefinedFunctions.count(symbol)) { + if (!linkObject(memberBuilder)) return false; + break; + } + } + } + return true; +} void Linker::emscriptenGlue(std::ostream& o) { if (debug) { diff --git a/src/wasm-linker.h b/src/wasm-linker.h index f6cf832c7..08d6e5f1e 100644 --- a/src/wasm-linker.h +++ b/src/wasm-linker.h @@ -24,6 +24,7 @@ #ifndef WASM_WASM_LINK_H #define WASM_WASM_LINK_H +#include "support/archive.h" #include "support/utilities.h" #include "wasm.h" @@ -56,7 +57,7 @@ class LinkerObject { // For now, do not support weak symbols or anything special. Just directly // merge the functions together, and remove any newly-defined functions // from undefinedFunction - void merge(SymbolInfo&& other) { + void merge(SymbolInfo& other) { for (const auto& func : other.implementedFunctions) { undefinedFunctions.erase(func); } @@ -210,8 +211,14 @@ class Linker { void emscriptenGlue(std::ostream& o); // Add an object to the link by constructing it in-place with a builder. + // Returns false if an error occurred. bool linkObject(S2WasmBuilder& builder); + // Add an archive to the link. Any objects in the archive that satisfy a + // currently-undefined reference will be added to the link. + // Returns false if an error occurred. + bool linkArchive(Archive& archive); + private: // Allocate a static variable and return its address in linear memory size_t allocateStatic(size_t allocSize, size_t alignment, Name name) { |