diff options
author | Derek Schuff <dschuff@chromium.org> | 2023-09-14 14:08:40 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-14 14:08:40 -0700 |
commit | f774effa54c6a40448487033a28a47caa3394f61 (patch) | |
tree | ee9764205ec4b6b916cc7e6e1ad94b59eb5f853d | |
parent | 11dba9b1c2ad988500b329727f39f4d8786918c5 (diff) | |
download | binaryen-f774effa54c6a40448487033a28a47caa3394f61.tar.gz binaryen-f774effa54c6a40448487033a28a47caa3394f61.tar.bz2 binaryen-f774effa54c6a40448487033a28a47caa3394f61.zip |
Encode command line to UTF8 on Windows (#5671)
This PR changes how file paths and the command line are handled. On startup on Windows,
we process the wstring version of the command line (including the file paths) and re-encode
it to UTF8 before handing it off to the rest of the command line handling logic. This means
that all paths are stored in UTF8-encoded std::strings as they go through the program, right
up until they are used to open files. At that time, they are converted to the appropriate native
format with the new to_path function before passing to the stdlib open functions.
This has the advantage that all of the non-file-opening code can use a single type to hold paths
(which is good since std::filesystem::path has proved problematic in some cases), but has the
disadvantage that someone could add new code that forgets to convert to_path before
opening. That's somewhat mitigated by the fact that most of the code uses the ModuleIOBase
classes for opening files.
Fixes #4995
-rw-r--r-- | src/support/command-line.cpp | 30 | ||||
-rw-r--r-- | src/support/file.cpp | 18 | ||||
-rw-r--r-- | src/support/path.cpp | 30 | ||||
-rw-r--r-- | src/support/path.h | 14 | ||||
-rw-r--r-- | src/tools/wasm-opt.cpp | 7 | ||||
-rw-r--r-- | src/wasm/wasm-io.cpp | 14 | ||||
-rw-r--r-- | test/lit/unicode-filenames.wast | 21 |
7 files changed, 119 insertions, 15 deletions
diff --git a/src/support/command-line.cpp b/src/support/command-line.cpp index 23ded0346..2b1b0b10b 100644 --- a/src/support/command-line.cpp +++ b/src/support/command-line.cpp @@ -17,6 +17,15 @@ #include "support/command-line.h" #include "config.h" #include "support/debug.h" +#include "support/path.h" + +#ifdef USE_WSTRING_PATHS +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include "windows.h" +#include "shellapi.h" +#endif using namespace wasm; @@ -163,6 +172,23 @@ Options& Options::add_positional(const std::string& name, } void Options::parse(int argc, const char* argv[]) { + +// On Windows, get the wide char version of the command line flags, and convert +// each one to std::string with UTF-8 manually. This means that all paths in +// Binaryen are stored this way on all platforms right up until a library call +// is made to open a file (at which point we use Path::to_path to convert back) +// so that it works with the underlying Win32 APIs. +// Only argList (and not argv) should be used below. +#ifdef USE_WSTRING_PATHS + LPWSTR* argListW = CommandLineToArgvW(GetCommandLineW(), &argc); + std::vector<std::string> argList; + for (size_t i = 0, e = argc; i < e; ++i) { + argList.push_back(wasm::Path::wstring_to_string(argListW[i])); + } +#else + const char** argList = argv; +#endif + assert(argc > 0 && "expect at least program name as an argument"); size_t positionalsSeen = 0; auto dashes = [](const std::string& s) { @@ -174,7 +200,7 @@ void Options::parse(int argc, const char* argv[]) { return s.size(); }; for (size_t i = 1, e = argc; i != e; ++i) { - std::string currentOption = argv[i]; + std::string currentOption = argList[i]; // "-" alone is a positional option if (dashes(currentOption) == 0 || currentOption == "-") { @@ -241,7 +267,7 @@ void Options::parse(int argc, const char* argv[]) { << currentOption << "'\n"; exit(EXIT_FAILURE); } - argument = argv[++i]; + argument = argList[++i]; } break; case Arguments::Optional: diff --git a/src/support/file.cpp b/src/support/file.cpp index cfd656391..feb05e136 100644 --- a/src/support/file.cpp +++ b/src/support/file.cpp @@ -16,6 +16,7 @@ #include "support/file.h" #include "support/debug.h" +#include "support/path.h" #include "support/utilities.h" #include <cstdint> @@ -57,7 +58,7 @@ T wasm::read_file(const std::string& filename, Flags::BinaryOption binary) { if (binary == Flags::Binary) { flags |= std::ifstream::binary; } - infile.open(filename, flags); + infile.open(wasm::Path::to_path(filename), flags); if (!infile.is_open()) { Fatal() << "Failed opening '" << filename << "'"; } @@ -108,13 +109,15 @@ wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary) buffer = std::cout.rdbuf(); } else { BYN_TRACE("Opening '" << filename << "'\n"); - auto flags = std::ofstream::out | std::ofstream::trunc; + std::ios_base::openmode flags = + std::ofstream::out | std::ofstream::trunc; if (binary == Flags::Binary) { flags |= std::ofstream::binary; } - outfile.open(filename, flags); + outfile.open(wasm::Path::to_path(filename), flags); if (!outfile.is_open()) { - Fatal() << "Failed opening '" << filename << "'"; + Fatal() << "Failed opening output file '" << filename + << "': " << strerror(errno); } buffer = outfile.rdbuf(); } @@ -122,12 +125,13 @@ wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary) }()) {} void wasm::copy_file(std::string input, std::string output) { - std::ifstream src(input, std::ios::binary); - std::ofstream dst(output, std::ios::binary); + std::ifstream src(wasm::Path::to_path(input), std::ios::binary); + std::ofstream dst(wasm::Path::to_path(output), std::ios::binary); dst << src.rdbuf(); } size_t wasm::file_size(std::string filename) { - std::ifstream infile(filename, std::ifstream::ate | std::ifstream::binary); + std::ifstream infile(wasm::Path::to_path(filename), + std::ifstream::ate | std::ifstream::binary); return infile.tellg(); } diff --git a/src/support/path.cpp b/src/support/path.cpp index 1858fe1e9..284618f09 100644 --- a/src/support/path.cpp +++ b/src/support/path.cpp @@ -19,9 +19,39 @@ // #include "support/path.h" +#ifdef USE_WSTRING_PATHS +#include "windows.h" +#endif namespace wasm::Path { +#ifdef USE_WSTRING_PATHS +PathString to_path(const std::string& s) { return string_to_wstring(s); } + +std::wstring string_to_wstring(const std::string& s) { + const char* inptr = s.data(); + size_t inlen = s.size(); + size_t outlen = MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, NULL, 0); + std::wstring outstr(outlen, 0); + const LPWSTR outptr = outstr.data(); + MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, outptr, outlen); + return outstr; +} + +std::string wstring_to_string(const std::wstring& s) { + const wchar_t* inptr = s.data(); + size_t inlen = s.size(); + size_t outlen = + WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, NULL, 0, NULL, NULL); + std::string outstr(outlen, 0); + const LPSTR outptr = outstr.data(); + WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, outptr, outlen, NULL, NULL); + return outstr; +} +#else +PathString to_path(const std::string& s) { return s; } +#endif + char getPathSeparator() { // TODO: use c++17's path separator // http://en.cppreference.com/w/cpp/experimental/fs/path diff --git a/src/support/path.h b/src/support/path.h index 78e85ca5c..8383bc53e 100644 --- a/src/support/path.h +++ b/src/support/path.h @@ -24,8 +24,22 @@ #include <cstdlib> #include <string> +#if defined(_WIN32) && !defined(__MINGW32__) +#define USE_WSTRING_PATHS 1 +#endif + namespace wasm::Path { +#ifdef USE_WSTRING_PATHS +using PathString = std::wstring; +std::wstring string_to_wstring(const std::string& s); +std::string wstring_to_string(const std::wstring& s); +#else +using PathString = std::string; +#endif + +PathString to_path(const std::string& s); + char getPathSeparator(); std::string getDirName(const std::string& path); std::string getBaseName(const std::string& path); diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 0d279dc08..5bbca1ea6 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -31,6 +31,7 @@ #include "support/command-line.h" #include "support/debug.h" #include "support/file.h" +#include "support/path.h" #include "wasm-binary.h" #include "wasm-interpreter.h" #include "wasm-io.h" @@ -330,19 +331,19 @@ int main(int argc, const char* argv[]) { if (emitJSWrapper.size() > 0) { std::ofstream outfile; - outfile.open(emitJSWrapper, std::ofstream::out); + outfile.open(wasm::Path::to_path(emitJSWrapper), std::ofstream::out); outfile << generateJSWrapper(wasm); outfile.close(); } if (emitSpecWrapper.size() > 0) { std::ofstream outfile; - outfile.open(emitSpecWrapper, std::ofstream::out); + outfile.open(wasm::Path::to_path(emitSpecWrapper), std::ofstream::out); outfile << generateSpecWrapper(wasm); outfile.close(); } if (emitWasm2CWrapper.size() > 0) { std::ofstream outfile; - outfile.open(emitWasm2CWrapper, std::ofstream::out); + outfile.open(wasm::Path::to_path(emitWasm2CWrapper), std::ofstream::out); outfile << generateWasm2CWrapper(wasm); outfile.close(); } diff --git a/src/wasm/wasm-io.cpp b/src/wasm/wasm-io.cpp index 65e6a982a..a340b4c23 100644 --- a/src/wasm/wasm-io.cpp +++ b/src/wasm/wasm-io.cpp @@ -26,6 +26,7 @@ #include "wasm-io.h" #include "support/debug.h" +#include "support/path.h" #include "wasm-binary.h" #include "wasm-s-parser.h" #include "wat-parser.h" @@ -69,7 +70,10 @@ void ModuleReader::readBinaryData(std::vector<char>& input, parser.setSkipFunctionBodies(skipFunctionBodies); if (sourceMapFilename.size()) { sourceMapStream = std::make_unique<std::ifstream>(); - sourceMapStream->open(sourceMapFilename); + sourceMapStream->open(wasm::Path::to_path(sourceMapFilename)); + if (!sourceMapStream->is_open()) { + Fatal() << "Failed opening '" << sourceMapFilename << "'"; + } parser.setDebugLocations(sourceMapStream.get()); } parser.read(); @@ -89,7 +93,7 @@ void ModuleReader::readBinary(std::string filename, bool ModuleReader::isBinaryFile(std::string filename) { std::ifstream infile; std::ios_base::openmode flags = std::ifstream::in | std::ifstream::binary; - infile.open(filename, flags); + infile.open(wasm::Path::to_path(filename), flags); char buffer[4] = {1, 2, 3, 4}; infile.read(buffer, 4); infile.close(); @@ -157,7 +161,11 @@ void ModuleWriter::writeBinary(Module& wasm, Output& output) { std::unique_ptr<std::ofstream> sourceMapStream; if (sourceMapFilename.size()) { sourceMapStream = std::make_unique<std::ofstream>(); - sourceMapStream->open(sourceMapFilename); + sourceMapStream->open(wasm::Path::to_path(sourceMapFilename)); + if (!sourceMapStream->is_open()) { + Fatal() << "Failed opening sourcemap output file '" << sourceMapFilename + << "'"; + } writer.setSourceMap(sourceMapStream.get(), sourceMapUrl); } if (symbolMap.size() > 0) { diff --git a/test/lit/unicode-filenames.wast b/test/lit/unicode-filenames.wast new file mode 100644 index 000000000..f30ad9c09 --- /dev/null +++ b/test/lit/unicode-filenames.wast @@ -0,0 +1,21 @@ +;; RUN: wasm-as %s -o %t-❤.wasm --source-map %t-🗺️.map +;; RUN: cat %t-🗺️.map | filecheck %s --check-prefix SOURCEMAP +;; RUN: wasm-opt %t-❤.wasm -o %t-🤬.wasm --emit-js-wrapper %t-❤.js --input-source-map %t-🗺️.map --output-source-map %t-🗺️.out.map +;; RUN: cat %t-🗺️.out.map | filecheck %s --check-prefix SOURCEMAP +;; RUN: wasm-dis %t-🤬.wasm | filecheck %s --check-prefix MODULE + +;; MODULE: i32.add +;; SOURCEMAP: src.cpp + +(module + (type $i32_i32_=>_i32 (func (param i32 i32) (result i32))) + (memory $0 256 256) + (export "add" (func $add)) + (func $add (; 0 ;) (param $0 i32) (param $1 i32) (result i32) + (i32.add + ;;@ src.cpp:10:1 + (local.get $0) + (local.get $1) + ) + ) +) |