From 4d28d3f32e7f213e300b24bc61c3f0ac9d6e1ab6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 19 Dec 2019 09:04:08 -0800 Subject: DWARF parsing and writing support using LLVM (#2520) This imports LLVM code for DWARF handling. That code has the Apache 2 license like us. It's also the same code used to emit DWARF in the common toolchain, so it seems like a safe choice. This adds two passes: --dwarfdump which runs the same code LLVM runs for llvm-dwarfdump. This shows we can parse it ok, and will be useful for debugging. And --dwarfupdate writes out the DWARF sections (unchanged from what we read, so it just roundtrips - for updating we need #2515). This puts LLVM in thirdparty which is added here. All the LLVM code is behind USE_LLVM_DWARF, which is on by default, but off in JS for now, as it increases code size by 20%. This current approach imports the LLVM files directly. This is not how they are intended to be used, so it required a bunch of local changes - more than I expected actually, for the platform-specific stuff. For now this seems to work, so it may be good enough, but in the long term we may want to switch to linking against libllvm. A downside to doing that is that binaryen users would need to have an LLVM build, and even in the waterfall builds we'd have a problem - while we ship LLVM there anyhow, we constantly update it, which means that binaryen would need to be on latest llvm all the time too (which otherwise, given DWARF is quite stable, we might not need to constantly update). An even larger issue is that as I did this work I learned about how DWARF works in LLVM, and while the reading code is easy to reuse, the writing code is trickier. The main code path is heavily integrated with the MC layer, which we don't have - we might want to create a "fake MC layer" for that, but it sounds hard. Instead, there is the YAML path which is used mostly for testing, and which can convert DWARF to and from YAML and from binary. Using the non-YAML parts there, we can convert binary DWARF to the YAML layer's nice Info data, then convert that to binary. This works, however, this is not the path LLVM uses normally, and it supports only some basic DWARF sections - I had to add ranges support, in fact. So if we need more complex things, we may end up needing to use the MC layer approach, or consider some other DWARF library. However, hopefully that should not affect the core binaryen code which just calls a library for DWARF stuff. Helps #2400 --- .../llvm-project/include/llvm/Support/Endian.h | 429 +++++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 third_party/llvm-project/include/llvm/Support/Endian.h (limited to 'third_party/llvm-project/include/llvm/Support/Endian.h') diff --git a/third_party/llvm-project/include/llvm/Support/Endian.h b/third_party/llvm-project/include/llvm/Support/Endian.h new file mode 100644 index 000000000..87aecedd3 --- /dev/null +++ b/third_party/llvm-project/include/llvm/Support/Endian.h @@ -0,0 +1,429 @@ +//===- Endian.h - Utilities for IO with endian specific data ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares generic functions to read and write endian specific data. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_ENDIAN_H +#define LLVM_SUPPORT_ENDIAN_H + +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/SwapByteOrder.h" +#include +#include +#include +#include +#include + +namespace llvm { +namespace support { + +enum endianness {big, little, native}; + +// These are named values for common alignments. +enum {aligned = 0, unaligned = 1}; + +namespace detail { + +/// ::value is either alignment, or alignof(T) if alignment is 0. +template +struct PickAlignment { + enum { value = alignment == 0 ? alignof(T) : alignment }; +}; + +} // end namespace detail + +namespace endian { + +constexpr endianness system_endianness() { + return sys::IsBigEndianHost ? big : little; +} + +template +inline value_type byte_swap(value_type value, endianness endian) { + if ((endian != native) && (endian != system_endianness())) + sys::swapByteOrder(value); + return value; +} + +/// Swap the bytes of value to match the given endianness. +template +inline value_type byte_swap(value_type value) { + return byte_swap(value, endian); +} + +/// Read a value of a particular endianness from memory. +template +inline value_type read(const void *memory, endianness endian) { + value_type ret; + + memcpy(&ret, + LLVM_ASSUME_ALIGNED( + memory, (detail::PickAlignment::value)), + sizeof(value_type)); + return byte_swap(ret, endian); +} + +template +inline value_type read(const void *memory) { + return read(memory, endian); +} + +/// Read a value of a particular endianness from a buffer, and increment the +/// buffer past that value. +template +inline value_type readNext(const CharT *&memory, endianness endian) { + value_type ret = read(memory, endian); + memory += sizeof(value_type); + return ret; +} + +template +inline value_type readNext(const CharT *&memory) { + return readNext(memory, endian); +} + +/// Write a value to memory with a particular endianness. +template +inline void write(void *memory, value_type value, endianness endian) { + value = byte_swap(value, endian); + memcpy(LLVM_ASSUME_ALIGNED( + memory, (detail::PickAlignment::value)), + &value, sizeof(value_type)); +} + +template +inline void write(void *memory, value_type value) { + write(memory, value, endian); +} + +template +using make_unsigned_t = typename std::make_unsigned::type; + +/// Read a value of a particular endianness from memory, for a location +/// that starts at the given bit offset within the first byte. +template +inline value_type readAtBitAlignment(const void *memory, uint64_t startBit) { + assert(startBit < 8); + if (startBit == 0) + return read(memory); + else { + // Read two values and compose the result from them. + value_type val[2]; + memcpy(&val[0], + LLVM_ASSUME_ALIGNED( + memory, (detail::PickAlignment::value)), + sizeof(value_type) * 2); + val[0] = byte_swap(val[0]); + val[1] = byte_swap(val[1]); + + // Shift bits from the lower value into place. + make_unsigned_t lowerVal = val[0] >> startBit; + // Mask off upper bits after right shift in case of signed type. + make_unsigned_t numBitsFirstVal = + (sizeof(value_type) * 8) - startBit; + lowerVal &= ((make_unsigned_t)1 << numBitsFirstVal) - 1; + + // Get the bits from the upper value. + make_unsigned_t upperVal = + val[1] & (((make_unsigned_t)1 << startBit) - 1); + // Shift them in to place. + upperVal <<= numBitsFirstVal; + + return lowerVal | upperVal; + } +} + +/// Write a value to memory with a particular endianness, for a location +/// that starts at the given bit offset within the first byte. +template +inline void writeAtBitAlignment(void *memory, value_type value, + uint64_t startBit) { + assert(startBit < 8); + if (startBit == 0) + write(memory, value); + else { + // Read two values and shift the result into them. + value_type val[2]; + memcpy(&val[0], + LLVM_ASSUME_ALIGNED( + memory, (detail::PickAlignment::value)), + sizeof(value_type) * 2); + val[0] = byte_swap(val[0]); + val[1] = byte_swap(val[1]); + + // Mask off any existing bits in the upper part of the lower value that + // we want to replace. + val[0] &= ((make_unsigned_t)1 << startBit) - 1; + make_unsigned_t numBitsFirstVal = + (sizeof(value_type) * 8) - startBit; + make_unsigned_t lowerVal = value; + if (startBit > 0) { + // Mask off the upper bits in the new value that are not going to go into + // the lower value. This avoids a left shift of a negative value, which + // is undefined behavior. + lowerVal &= (((make_unsigned_t)1 << numBitsFirstVal) - 1); + // Now shift the new bits into place + lowerVal <<= startBit; + } + val[0] |= lowerVal; + + // Mask off any existing bits in the lower part of the upper value that + // we want to replace. + val[1] &= ~(((make_unsigned_t)1 << startBit) - 1); + // Next shift the bits that go into the upper value into position. + make_unsigned_t upperVal = value >> numBitsFirstVal; + // Mask off upper bits after right shift in case of signed type. + upperVal &= ((make_unsigned_t)1 << startBit) - 1; + val[1] |= upperVal; + + // Finally, rewrite values. + val[0] = byte_swap(val[0]); + val[1] = byte_swap(val[1]); + memcpy(LLVM_ASSUME_ALIGNED( + memory, (detail::PickAlignment::value)), + &val[0], sizeof(value_type) * 2); + } +} + +} // end namespace endian + +namespace detail { + +template ::value> +struct packed_endian_specific_integral { + using value_type = ValueType; + static constexpr endianness endian = Endian; + static constexpr std::size_t alignment = Alignment; + + packed_endian_specific_integral() = default; + + explicit packed_endian_specific_integral(value_type val) { *this = val; } + + operator value_type() const { + return endian::read( + (const void*)Value.buffer); + } + + void operator=(value_type newValue) { + endian::write( + (void*)Value.buffer, newValue); + } + + packed_endian_specific_integral &operator+=(value_type newValue) { + *this = *this + newValue; + return *this; + } + + packed_endian_specific_integral &operator-=(value_type newValue) { + *this = *this - newValue; + return *this; + } + + packed_endian_specific_integral &operator|=(value_type newValue) { + *this = *this | newValue; + return *this; + } + + packed_endian_specific_integral &operator&=(value_type newValue) { + *this = *this & newValue; + return *this; + } + +private: + struct { + alignas(ALIGN) char buffer[sizeof(value_type)]; + } Value; + +public: + struct ref { + explicit ref(void *Ptr) : Ptr(Ptr) {} + + operator value_type() const { + return endian::read(Ptr); + } + + void operator=(value_type NewValue) { + endian::write(Ptr, NewValue); + } + + private: + void *Ptr; + }; +}; + +} // end namespace detail + +using ulittle16_t = + detail::packed_endian_specific_integral; +using ulittle32_t = + detail::packed_endian_specific_integral; +using ulittle64_t = + detail::packed_endian_specific_integral; + +using little16_t = + detail::packed_endian_specific_integral; +using little32_t = + detail::packed_endian_specific_integral; +using little64_t = + detail::packed_endian_specific_integral; + +using aligned_ulittle16_t = + detail::packed_endian_specific_integral; +using aligned_ulittle32_t = + detail::packed_endian_specific_integral; +using aligned_ulittle64_t = + detail::packed_endian_specific_integral; + +using aligned_little16_t = + detail::packed_endian_specific_integral; +using aligned_little32_t = + detail::packed_endian_specific_integral; +using aligned_little64_t = + detail::packed_endian_specific_integral; + +using ubig16_t = + detail::packed_endian_specific_integral; +using ubig32_t = + detail::packed_endian_specific_integral; +using ubig64_t = + detail::packed_endian_specific_integral; + +using big16_t = + detail::packed_endian_specific_integral; +using big32_t = + detail::packed_endian_specific_integral; +using big64_t = + detail::packed_endian_specific_integral; + +using aligned_ubig16_t = + detail::packed_endian_specific_integral; +using aligned_ubig32_t = + detail::packed_endian_specific_integral; +using aligned_ubig64_t = + detail::packed_endian_specific_integral; + +using aligned_big16_t = + detail::packed_endian_specific_integral; +using aligned_big32_t = + detail::packed_endian_specific_integral; +using aligned_big64_t = + detail::packed_endian_specific_integral; + +using unaligned_uint16_t = + detail::packed_endian_specific_integral; +using unaligned_uint32_t = + detail::packed_endian_specific_integral; +using unaligned_uint64_t = + detail::packed_endian_specific_integral; + +using unaligned_int16_t = + detail::packed_endian_specific_integral; +using unaligned_int32_t = + detail::packed_endian_specific_integral; +using unaligned_int64_t = + detail::packed_endian_specific_integral; + +template +using little_t = detail::packed_endian_specific_integral; +template +using big_t = detail::packed_endian_specific_integral; + +template +using aligned_little_t = + detail::packed_endian_specific_integral; +template +using aligned_big_t = detail::packed_endian_specific_integral; + +namespace endian { + +template inline T read(const void *P, endianness E) { + return read(P, E); +} + +template inline T read(const void *P) { + return *(const detail::packed_endian_specific_integral *)P; +} + +inline uint16_t read16(const void *P, endianness E) { + return read(P, E); +} +inline uint32_t read32(const void *P, endianness E) { + return read(P, E); +} +inline uint64_t read64(const void *P, endianness E) { + return read(P, E); +} + +template inline uint16_t read16(const void *P) { + return read(P); +} +template inline uint32_t read32(const void *P) { + return read(P); +} +template inline uint64_t read64(const void *P) { + return read(P); +} + +inline uint16_t read16le(const void *P) { return read16(P); } +inline uint32_t read32le(const void *P) { return read32(P); } +inline uint64_t read64le(const void *P) { return read64(P); } +inline uint16_t read16be(const void *P) { return read16(P); } +inline uint32_t read32be(const void *P) { return read32(P); } +inline uint64_t read64be(const void *P) { return read64(P); } + +template inline void write(void *P, T V, endianness E) { + write(P, V, E); +} + +template inline void write(void *P, T V) { + *(detail::packed_endian_specific_integral *)P = V; +} + +inline void write16(void *P, uint16_t V, endianness E) { + write(P, V, E); +} +inline void write32(void *P, uint32_t V, endianness E) { + write(P, V, E); +} +inline void write64(void *P, uint64_t V, endianness E) { + write(P, V, E); +} + +template inline void write16(void *P, uint16_t V) { + write(P, V); +} +template inline void write32(void *P, uint32_t V) { + write(P, V); +} +template inline void write64(void *P, uint64_t V) { + write(P, V); +} + +inline void write16le(void *P, uint16_t V) { write16(P, V); } +inline void write32le(void *P, uint32_t V) { write32(P, V); } +inline void write64le(void *P, uint64_t V) { write64(P, V); } +inline void write16be(void *P, uint16_t V) { write16(P, V); } +inline void write32be(void *P, uint32_t V) { write32(P, V); } +inline void write64be(void *P, uint64_t V) { write64(P, V); } + +} // end namespace endian + +} // end namespace support +} // end namespace llvm + +#endif // LLVM_SUPPORT_ENDIAN_H -- cgit v1.2.3