diff options
author | Derek Schuff <dschuff@chromium.org> | 2023-09-14 14:08:40 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-14 14:08:40 -0700 |
commit | f774effa54c6a40448487033a28a47caa3394f61 (patch) | |
tree | ee9764205ec4b6b916cc7e6e1ad94b59eb5f853d /src/support | |
parent | 11dba9b1c2ad988500b329727f39f4d8786918c5 (diff) | |
download | binaryen-f774effa54c6a40448487033a28a47caa3394f61.tar.gz binaryen-f774effa54c6a40448487033a28a47caa3394f61.tar.bz2 binaryen-f774effa54c6a40448487033a28a47caa3394f61.zip |
Encode command line to UTF8 on Windows (#5671)
This PR changes how file paths and the command line are handled. On startup on Windows,
we process the wstring version of the command line (including the file paths) and re-encode
it to UTF8 before handing it off to the rest of the command line handling logic. This means
that all paths are stored in UTF8-encoded std::strings as they go through the program, right
up until they are used to open files. At that time, they are converted to the appropriate native
format with the new to_path function before passing to the stdlib open functions.
This has the advantage that all of the non-file-opening code can use a single type to hold paths
(which is good since std::filesystem::path has proved problematic in some cases), but has the
disadvantage that someone could add new code that forgets to convert to_path before
opening. That's somewhat mitigated by the fact that most of the code uses the ModuleIOBase
classes for opening files.
Fixes #4995
Diffstat (limited to 'src/support')
-rw-r--r-- | src/support/command-line.cpp | 30 | ||||
-rw-r--r-- | src/support/file.cpp | 18 | ||||
-rw-r--r-- | src/support/path.cpp | 30 | ||||
-rw-r--r-- | src/support/path.h | 14 |
4 files changed, 83 insertions, 9 deletions
diff --git a/src/support/command-line.cpp b/src/support/command-line.cpp index 23ded0346..2b1b0b10b 100644 --- a/src/support/command-line.cpp +++ b/src/support/command-line.cpp @@ -17,6 +17,15 @@ #include "support/command-line.h" #include "config.h" #include "support/debug.h" +#include "support/path.h" + +#ifdef USE_WSTRING_PATHS +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include "windows.h" +#include "shellapi.h" +#endif using namespace wasm; @@ -163,6 +172,23 @@ Options& Options::add_positional(const std::string& name, } void Options::parse(int argc, const char* argv[]) { + +// On Windows, get the wide char version of the command line flags, and convert +// each one to std::string with UTF-8 manually. This means that all paths in +// Binaryen are stored this way on all platforms right up until a library call +// is made to open a file (at which point we use Path::to_path to convert back) +// so that it works with the underlying Win32 APIs. +// Only argList (and not argv) should be used below. +#ifdef USE_WSTRING_PATHS + LPWSTR* argListW = CommandLineToArgvW(GetCommandLineW(), &argc); + std::vector<std::string> argList; + for (size_t i = 0, e = argc; i < e; ++i) { + argList.push_back(wasm::Path::wstring_to_string(argListW[i])); + } +#else + const char** argList = argv; +#endif + assert(argc > 0 && "expect at least program name as an argument"); size_t positionalsSeen = 0; auto dashes = [](const std::string& s) { @@ -174,7 +200,7 @@ void Options::parse(int argc, const char* argv[]) { return s.size(); }; for (size_t i = 1, e = argc; i != e; ++i) { - std::string currentOption = argv[i]; + std::string currentOption = argList[i]; // "-" alone is a positional option if (dashes(currentOption) == 0 || currentOption == "-") { @@ -241,7 +267,7 @@ void Options::parse(int argc, const char* argv[]) { << currentOption << "'\n"; exit(EXIT_FAILURE); } - argument = argv[++i]; + argument = argList[++i]; } break; case Arguments::Optional: diff --git a/src/support/file.cpp b/src/support/file.cpp index cfd656391..feb05e136 100644 --- a/src/support/file.cpp +++ b/src/support/file.cpp @@ -16,6 +16,7 @@ #include "support/file.h" #include "support/debug.h" +#include "support/path.h" #include "support/utilities.h" #include <cstdint> @@ -57,7 +58,7 @@ T wasm::read_file(const std::string& filename, Flags::BinaryOption binary) { if (binary == Flags::Binary) { flags |= std::ifstream::binary; } - infile.open(filename, flags); + infile.open(wasm::Path::to_path(filename), flags); if (!infile.is_open()) { Fatal() << "Failed opening '" << filename << "'"; } @@ -108,13 +109,15 @@ wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary) buffer = std::cout.rdbuf(); } else { BYN_TRACE("Opening '" << filename << "'\n"); - auto flags = std::ofstream::out | std::ofstream::trunc; + std::ios_base::openmode flags = + std::ofstream::out | std::ofstream::trunc; if (binary == Flags::Binary) { flags |= std::ofstream::binary; } - outfile.open(filename, flags); + outfile.open(wasm::Path::to_path(filename), flags); if (!outfile.is_open()) { - Fatal() << "Failed opening '" << filename << "'"; + Fatal() << "Failed opening output file '" << filename + << "': " << strerror(errno); } buffer = outfile.rdbuf(); } @@ -122,12 +125,13 @@ wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary) }()) {} void wasm::copy_file(std::string input, std::string output) { - std::ifstream src(input, std::ios::binary); - std::ofstream dst(output, std::ios::binary); + std::ifstream src(wasm::Path::to_path(input), std::ios::binary); + std::ofstream dst(wasm::Path::to_path(output), std::ios::binary); dst << src.rdbuf(); } size_t wasm::file_size(std::string filename) { - std::ifstream infile(filename, std::ifstream::ate | std::ifstream::binary); + std::ifstream infile(wasm::Path::to_path(filename), + std::ifstream::ate | std::ifstream::binary); return infile.tellg(); } diff --git a/src/support/path.cpp b/src/support/path.cpp index 1858fe1e9..284618f09 100644 --- a/src/support/path.cpp +++ b/src/support/path.cpp @@ -19,9 +19,39 @@ // #include "support/path.h" +#ifdef USE_WSTRING_PATHS +#include "windows.h" +#endif namespace wasm::Path { +#ifdef USE_WSTRING_PATHS +PathString to_path(const std::string& s) { return string_to_wstring(s); } + +std::wstring string_to_wstring(const std::string& s) { + const char* inptr = s.data(); + size_t inlen = s.size(); + size_t outlen = MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, NULL, 0); + std::wstring outstr(outlen, 0); + const LPWSTR outptr = outstr.data(); + MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, outptr, outlen); + return outstr; +} + +std::string wstring_to_string(const std::wstring& s) { + const wchar_t* inptr = s.data(); + size_t inlen = s.size(); + size_t outlen = + WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, NULL, 0, NULL, NULL); + std::string outstr(outlen, 0); + const LPSTR outptr = outstr.data(); + WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, outptr, outlen, NULL, NULL); + return outstr; +} +#else +PathString to_path(const std::string& s) { return s; } +#endif + char getPathSeparator() { // TODO: use c++17's path separator // http://en.cppreference.com/w/cpp/experimental/fs/path diff --git a/src/support/path.h b/src/support/path.h index 78e85ca5c..8383bc53e 100644 --- a/src/support/path.h +++ b/src/support/path.h @@ -24,8 +24,22 @@ #include <cstdlib> #include <string> +#if defined(_WIN32) && !defined(__MINGW32__) +#define USE_WSTRING_PATHS 1 +#endif + namespace wasm::Path { +#ifdef USE_WSTRING_PATHS +using PathString = std::wstring; +std::wstring string_to_wstring(const std::string& s); +std::string wstring_to_string(const std::wstring& s); +#else +using PathString = std::string; +#endif + +PathString to_path(const std::string& s); + char getPathSeparator(); std::string getDirName(const std::string& path); std::string getBaseName(const std::string& path); |