summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2016-05-05 08:01:57 -0700
committerDerek Schuff <dschuff@chromium.org>2016-05-05 08:01:57 -0700
commit2811727a118e92e6b7ef293458f9bab1cf5dacdc (patch)
treead18967b67f3596b28f3013ad6f05cc269d126c6 /src
parent1397997bc91663f19c387c69c7e47930efe57539 (diff)
downloadbinaryen-2811727a118e92e6b7ef293458f9bab1cf5dacdc.tar.gz
binaryen-2811727a118e92e6b7ef293458f9bab1cf5dacdc.tar.bz2
binaryen-2811727a118e92e6b7ef293458f9bab1cf5dacdc.zip
[Linker] Handle archive files
Add a class to parse archive files. Support linking archive files, with archive semantics (i.e. an archive member is linked in if it satisfies an undefined reference). Archive files must be gnu-format archives containing .s files. Add tests for linking semantics.
Diffstat (limited to 'src')
-rw-r--r--src/s2wasm-main.cpp20
-rw-r--r--src/s2wasm.h18
-rw-r--r--src/support/archive.cpp239
-rw-r--r--src/support/archive.h105
-rw-r--r--src/wasm-linker.cpp26
-rw-r--r--src/wasm-linker.h9
6 files changed, 402 insertions, 15 deletions
diff --git a/src/s2wasm-main.cpp b/src/s2wasm-main.cpp
index 7b0e4c179..9688f6696 100644
--- a/src/s2wasm-main.cpp
+++ b/src/s2wasm-main.cpp
@@ -32,6 +32,7 @@ int main(int argc, const char *argv[]) {
bool ignoreUnknownSymbols = false;
bool generateEmscriptenGlue = false;
std::string startFunction;
+ std::vector<std::string> archiveLibraries;
Options options("s2wasm", "Link .s file into .wast");
options
.add("--output", "-o", "Output file (stdout if not specified)",
@@ -75,13 +76,19 @@ int main(int argc, const char *argv[]) {
[&generateEmscriptenGlue](Options *, const std::string &) {
generateEmscriptenGlue = true;
})
+ .add("--library", "-l", "Add archive library",
+ Options::Arguments::N,
+ [&archiveLibraries](Options *o, const std::string &argument) {
+ archiveLibraries.push_back(argument);
+ })
.add_positional("INFILE", Options::Arguments::One,
[](Options *o, const std::string &argument) {
o->extra["infile"] = argument;
});
options.parse(argc, argv);
- auto input(read_file<std::string>(options.extra["infile"], Flags::Text, options.debug ? Flags::Debug : Flags::Release));
+ auto debugFlag = options.debug ? Flags::Debug : Flags::Release;
+ auto input(read_file<std::string>(options.extra["infile"], Flags::Text, debugFlag));
if (options.debug) std::cerr << "Parsing and wasming..." << std::endl;
uint64_t globalBase = options.extra.find("global-base") != options.extra.end()
@@ -107,10 +114,13 @@ int main(int argc, const char *argv[]) {
S2WasmBuilder mainbuilder(input.c_str(), options.debug);
linker.linkObject(mainbuilder);
- // In the future, there will be code to open additional files/buffers and
- // link additional objects, as well as archive members (which only get linked if needed), e.g.:
- // S2WasmBuilder lazyObject(some_other_buffer, options.debug)
- // linker.linkLazyObject(lazyObject); // calls builder.scan to get symbol info, then build
+ for (const auto& m : archiveLibraries) {
+ auto archiveFile(read_file<std::vector<char>>(m, Flags::Binary, debugFlag));
+ bool error;
+ Archive lib(archiveFile, error);
+ if (error) Fatal() << "Error opening archive " << m << "\n";
+ linker.linkArchive(lib);
+ }
linker.layout();
diff --git a/src/s2wasm.h b/src/s2wasm.h
index 941779ae5..9c0ac81e2 100644
--- a/src/s2wasm.h
+++ b/src/s2wasm.h
@@ -43,6 +43,7 @@ class S2WasmBuilder {
Module* wasm;
MixedArena* allocator;
LinkerObject* linkerObj;
+ std::unique_ptr<LinkerObject::SymbolInfo> symbolInfo;
public:
S2WasmBuilder(const char* input, bool debug)
@@ -54,9 +55,10 @@ class S2WasmBuilder {
linkerObj(nullptr)
{}
- void build(LinkerObject *obj, LinkerObject::SymbolInfo* info) {
- if (!obj->isEmpty()) Fatal() << "Cannot construct an S2WasmBuilder in an non-empty LinkerObject";
- if (!info) info = getSymbolInfo();
+ void build(LinkerObject *obj) {
+ // If getSymbolInfo has not already been called, populate the symbol
+ // info now.
+ if (!symbolInfo) symbolInfo.reset(getSymbolInfo());
linkerObj = obj;
wasm = &obj->wasm;
allocator = &wasm->allocator;
@@ -65,10 +67,14 @@ class S2WasmBuilder {
process();
}
+ // getSymbolInfo scans the .s file to determine what symbols it defines
+ // and references.
LinkerObject::SymbolInfo* getSymbolInfo() {
- auto* info = new LinkerObject::SymbolInfo();
- scan(info);
- return info;
+ if (!symbolInfo) {
+ symbolInfo = make_unique<LinkerObject::SymbolInfo>();
+ scan(symbolInfo.get());
+ }
+ return symbolInfo.get();
}
private:
diff --git a/src/support/archive.cpp b/src/support/archive.cpp
new file mode 100644
index 000000000..3879f72a7
--- /dev/null
+++ b/src/support/archive.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2016 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "support/archive.h"
+
+#include <cstring>
+#include "support/utilities.h"
+
+static const char* const magic = "!<arch>\n";
+
+class ArchiveMemberHeader {
+ public:
+ uint8_t fileName[16];
+ uint8_t timestamp[12];
+ uint8_t UID[6];
+ uint8_t GID[6];
+ uint8_t accessMode[8];
+ uint8_t size[10]; // Size of data only, not including padding or header
+ uint8_t magic[2];
+
+ std::string getName() const;
+ // Members are not larger than 4GB
+ uint32_t getSize() const;
+};
+
+std::string ArchiveMemberHeader::getName() const {
+ char endChar;
+ if (fileName[0] == '/') {
+ // Special name (string table or reference, or symbol table)
+ endChar = ' ';
+ } else {
+ endChar = '/'; // regular name
+ }
+ auto* end =
+ static_cast<const uint8_t*>(memchr(fileName, endChar, sizeof(fileName)));
+ if (!end) end = fileName + sizeof(fileName);
+ return std::string((char*)(fileName), end - fileName);
+}
+
+uint32_t ArchiveMemberHeader::getSize() const {
+ auto* end = static_cast<const char*>(memchr(size, ' ', sizeof(size)));
+ std::string sizeString((const char*)size, end);
+ auto sizeInt = std::stoll(sizeString, nullptr, 10);
+ if (sizeInt < 0 || sizeInt >= std::numeric_limits<uint32_t>::max()) {
+ wasm::Fatal() << "Malformed archive: size parsing failed\n";
+ }
+ return static_cast<uint32_t>(sizeInt);
+}
+
+Archive::Archive(Buffer& b, bool& error) : data(b) {
+ error = false;
+ if (data.size() < strlen(magic) ||
+ memcmp(data.data(), magic, strlen(magic))) {
+ error = true;
+ return;
+ }
+
+ // We require GNU format archives. So the first member may be named "/" and it
+ // points to the symbol table. The next member may optionally be "//" and
+ // point to a string table if a filename is too large to fit in the 16-char
+ // name field of the header.
+ child_iterator it = child_begin(false);
+ if (it.hasError()) {
+ error = true;
+ return;
+ }
+ child_iterator end = child_end();
+ if (it == end) return; // Empty archive.
+
+ const Child* c = &*it;
+
+ auto increment = [&]() {
+ ++it;
+ error = it.hasError();
+ if (error) return true;
+ c = &*it;
+ return false;
+ };
+
+ std::string name = c->getRawName();
+ if (name == "/") {
+ symbolTable = c->getBuffer();
+ if (increment() || it == end) return;
+ name = c->getRawName();
+ }
+
+ if (name == "//") {
+ stringTable = c->getBuffer();
+ if (increment() || it == end) return;
+ setFirstRegular(*c);
+ return;
+ }
+ if (name[0] != '/') {
+ setFirstRegular(*c);
+ return;
+ }
+ // Not a GNU archive.
+ error = true;
+}
+
+Archive::Child::Child(const Archive* parent, const uint8_t* data, bool* error)
+ : parent(parent), data(data) {
+ if (!data) return;
+ len = sizeof(ArchiveMemberHeader) + getHeader()->getSize();
+ startOfFile = sizeof(ArchiveMemberHeader);
+}
+
+uint32_t Archive::Child::getSize() const { return len - startOfFile; }
+
+Archive::SubBuffer Archive::Child::getBuffer() const {
+ return {data + startOfFile, getSize()};
+}
+
+std::string Archive::Child::getRawName() const {
+ return getHeader()->getName();
+}
+
+Archive::Child Archive::Child::getNext(bool& error) const {
+ size_t toSkip = len;
+ // Members are aligned to even byte boundaries.
+ if (toSkip & 1) ++toSkip;
+ const uint8_t* nextLoc = data + toSkip;
+ if (nextLoc >= (uint8_t*)&*parent->data.end()) { // End of the archive.
+ return Child();
+ }
+
+ return Child(parent, nextLoc, &error);
+}
+
+std::string Archive::Child::getName() const {
+ std::string name = getRawName();
+ // Check if it's a special name.
+ if (name[0] == '/') {
+ if (name.size() == 1) { // Linker member.
+ return name;
+ }
+ if (name.size() == 2 && name[1] == '/') { // String table.
+ return name;
+ }
+ // It's a long name.
+ // Get the offset.
+ int offset = std::stoi(name.substr(1), nullptr, 10);
+
+ // Verify it.
+ if (offset < 0 || (unsigned)offset >= parent->stringTable.len) {
+ wasm::Fatal() << "Malformed archive: name parsing failed\n";
+ }
+
+ std::string addr(parent->stringTable.data + offset,
+ parent->stringTable.data + parent->stringTable.len);
+
+ // GNU long file names end with a "/\n".
+ size_t end = addr.find('\n');
+ return addr.substr(0, end - 1);
+ }
+ // It's a simple name.
+ if (name[name.size() - 1] == '/') {
+ return name.substr(0, name.size() - 1);
+ }
+ return name;
+}
+
+Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
+ if (data.size() == 0) return child_end();
+
+ if (SkipInternal) {
+ child_iterator it;
+ it.child = Child(this, firstRegularData, &it.error);
+ return it;
+ }
+
+ auto* loc = (const uint8_t*)data.data() + strlen(magic);
+ child_iterator it;
+ it.child = Child(this, loc, &it.error);
+ return it;
+}
+
+Archive::child_iterator Archive::child_end() const { return Child(); }
+
+namespace {
+struct Symbol {
+ uint32_t symbolIndex;
+ uint32_t stringIndex;
+ void next(Archive::SubBuffer& symbolTable) {
+ // Symbol table entries are NUL-terminated. Skip past the next NUL.
+ stringIndex = strchr((char*)symbolTable.data + stringIndex, '\0') -
+ (char*)symbolTable.data + 1;
+ ++symbolIndex;
+ }
+};
+}
+
+static uint32_t read32be(const uint8_t* buf) {
+ return static_cast<uint32_t>(buf[0]) << 24 |
+ static_cast<uint32_t>(buf[1]) << 16 |
+ static_cast<uint32_t>(buf[2]) << 8 | static_cast<uint32_t>(buf[3]);
+}
+
+void Archive::dump() const {
+ printf("Archive data %p len %lu, firstRegularData %p\n", data.data(),
+ data.size(), firstRegularData);
+ printf("Symbol table %p, len %u\n", symbolTable.data, symbolTable.len);
+ printf("string table %p, len %u\n", stringTable.data, stringTable.len);
+ const uint8_t* buf = symbolTable.data;
+ if (!buf) {
+ for (auto c = child_begin(), e = child_end(); c != e; ++c) {
+ printf("Child %p, len %u, name %s, size %u\n", c->data, c->len,
+ c->getName().c_str(), c->getSize());
+ }
+ return;
+ }
+ uint32_t symbolCount = read32be(buf);
+ printf("Symbol count %u\n", symbolCount);
+ buf += sizeof(uint32_t) + (symbolCount * sizeof(uint32_t));
+ uint32_t string_start_offset = buf - symbolTable.data;
+ Symbol sym = {0, string_start_offset};
+ while (sym.symbolIndex != symbolCount) {
+ printf("Symbol %u, offset %u\n", sym.symbolIndex, sym.stringIndex);
+ // get the member
+ uint32_t offset = read32be(symbolTable.data + sym.symbolIndex * 4);
+ auto* loc = (const uint8_t*)&data[offset];
+ child_iterator it;
+ it.child = Child(this, loc, &it.error);
+ printf("Child %p, len %u\n", it.child.data, it.child.len);
+ }
+}
diff --git a/src/support/archive.h b/src/support/archive.h
new file mode 100644
index 000000000..40fea2529
--- /dev/null
+++ b/src/support/archive.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2016 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Minimal class for interacting with archives. The API is inspired by LLVM's
+ * Archive class, (in case we want to switch to using that at some point);
+ * however we are missing useful error-handling capabilities and other utilities
+ * that LLVM has (e.g. ErrorOr, MemoryBuffer, StringRef).
+ * We only support the GNU format (not the BSD or COFF variants)
+ */
+
+#ifndef wasm_support_archive_h
+#define wasm_support_archive_h
+
+#include <cstdint>
+#include <vector>
+
+#include "wasm.h"
+
+class ArchiveMemberHeader;
+
+class Archive {
+ // Vector is char instead of uint8_t because read_file only works with char.
+ // Everything else is uint8_t to help distinguish between uses as
+ // uninterpreted bytes (most uses) and C strings (a few uses e.g. strchr)
+ // because most things in these buffers are not nul-terminated
+ using Buffer = std::vector<char>;
+
+ public:
+ struct SubBuffer {
+ const uint8_t* data;
+ uint32_t len;
+ };
+ class Child {
+ friend class Archive;
+ const Archive* parent = nullptr;
+ // Includes header but not padding byte.
+ const uint8_t* data = nullptr;
+ uint32_t len = 0;
+ // Offset from data to the start of the file
+ uint16_t startOfFile = 0;
+ const ArchiveMemberHeader* getHeader() const {
+ return reinterpret_cast<const ArchiveMemberHeader*>(data);
+ }
+ Child getNext(bool& error) const;
+
+ public:
+ Child(){};
+ Child(const Archive* parent, const uint8_t* data, bool* error);
+ // Size of actual member data (no header/padding)
+ uint32_t getSize() const;
+ SubBuffer getBuffer() const;
+ std::string getRawName() const;
+ std::string getName() const;
+ bool operator==(const Child& other) const { return data == other.data; }
+ };
+ class child_iterator {
+ friend class Archive;
+ Child child;
+ bool error = false; // TODO: use std::error_code instead?
+ public:
+ child_iterator() {}
+ explicit child_iterator(bool error) : error(error) {}
+ child_iterator(const Child& c) : child(c) {}
+ const Child* operator->() const { return &child; }
+ const Child& operator*() const { return child; }
+ bool operator==(const child_iterator& other) const {
+ return child == other.child;
+ }
+ bool operator!=(const child_iterator& other) const {
+ return !(*this == other);
+ }
+ child_iterator& operator++() {
+ assert(!error);
+ child = child.getNext(error);
+ return *this;
+ }
+ bool hasError() const { return error; }
+ };
+ Archive(Buffer& buffer, bool& error);
+ child_iterator child_begin(bool SkipInternal = true) const;
+ child_iterator child_end() const;
+ void dump() const;
+
+ private:
+ void setFirstRegular(const Child& c) { firstRegularData = c.data; }
+ Buffer& data;
+ SubBuffer symbolTable = {nullptr, 0};
+ SubBuffer stringTable = {nullptr, 0};
+ const uint8_t* firstRegularData;
+};
+
+#endif // wasm_support_archive_h
diff --git a/src/wasm-linker.cpp b/src/wasm-linker.cpp
index a854f0cd0..f4f516073 100644
--- a/src/wasm-linker.cpp
+++ b/src/wasm-linker.cpp
@@ -55,6 +55,7 @@ void Linker::layout() {
// Convert calls to undefined functions to call_imports
for (const auto& f : out.undefinedFunctionCalls) {
Name target = f.first;
+ if (!out.symbolInfo.undefinedFunctions.count(target)) continue;
// Create an import for the target if necessary.
if (!out.wasm.checkImport(target)) {
auto import = new Import;
@@ -210,12 +211,31 @@ bool Linker::linkObject(S2WasmBuilder& builder) {
return false;
}
}
- out.symbolInfo.merge(std::move(*newSymbols));
- builder.build(&out, &out.symbolInfo);
- delete newSymbols;
+ out.symbolInfo.merge(*newSymbols);
+ builder.build(&out);
return true;
}
+bool Linker::linkArchive(Archive& archive) {
+ for (auto child = archive.child_begin(), end = archive.child_end();
+ child != end; ++child) {
+ Archive::SubBuffer memberBuf = child->getBuffer();
+ // S2WasmBuilder expects its input to be NUL-terminated. Archive members are
+ // not NUL-terminated. So we have to copy the contents out before parsing.
+ std::vector<char> memberString(memberBuf.len + 1);
+ memcpy(memberString.data(), memberBuf.data, memberBuf.len);
+ memberString[memberBuf.len] = '\0';
+ S2WasmBuilder memberBuilder(memberString.data(), false);
+ auto* memberSymbols = memberBuilder.getSymbolInfo();
+ for (const Name& symbol : memberSymbols->implementedFunctions) {
+ if (out.symbolInfo.undefinedFunctions.count(symbol)) {
+ if (!linkObject(memberBuilder)) return false;
+ break;
+ }
+ }
+ }
+ return true;
+}
void Linker::emscriptenGlue(std::ostream& o) {
if (debug) {
diff --git a/src/wasm-linker.h b/src/wasm-linker.h
index f6cf832c7..08d6e5f1e 100644
--- a/src/wasm-linker.h
+++ b/src/wasm-linker.h
@@ -24,6 +24,7 @@
#ifndef WASM_WASM_LINK_H
#define WASM_WASM_LINK_H
+#include "support/archive.h"
#include "support/utilities.h"
#include "wasm.h"
@@ -56,7 +57,7 @@ class LinkerObject {
// For now, do not support weak symbols or anything special. Just directly
// merge the functions together, and remove any newly-defined functions
// from undefinedFunction
- void merge(SymbolInfo&& other) {
+ void merge(SymbolInfo& other) {
for (const auto& func : other.implementedFunctions) {
undefinedFunctions.erase(func);
}
@@ -210,8 +211,14 @@ class Linker {
void emscriptenGlue(std::ostream& o);
// Add an object to the link by constructing it in-place with a builder.
+ // Returns false if an error occurred.
bool linkObject(S2WasmBuilder& builder);
+ // Add an archive to the link. Any objects in the archive that satisfy a
+ // currently-undefined reference will be added to the link.
+ // Returns false if an error occurred.
+ bool linkArchive(Archive& archive);
+
private:
// Allocate a static variable and return its address in linear memory
size_t allocateStatic(size_t allocSize, size_t alignment, Name name) {