summaryrefslogtreecommitdiff
path: root/src/support
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2023-09-14 14:08:40 -0700
committerGitHub <noreply@github.com>2023-09-14 14:08:40 -0700
commitf774effa54c6a40448487033a28a47caa3394f61 (patch)
treeee9764205ec4b6b916cc7e6e1ad94b59eb5f853d /src/support
parent11dba9b1c2ad988500b329727f39f4d8786918c5 (diff)
downloadbinaryen-f774effa54c6a40448487033a28a47caa3394f61.tar.gz
binaryen-f774effa54c6a40448487033a28a47caa3394f61.tar.bz2
binaryen-f774effa54c6a40448487033a28a47caa3394f61.zip
Encode command line to UTF8 on Windows (#5671)
This PR changes how file paths and the command line are handled. On startup on Windows, we process the wstring version of the command line (including the file paths) and re-encode it to UTF8 before handing it off to the rest of the command line handling logic. This means that all paths are stored in UTF8-encoded std::strings as they go through the program, right up until they are used to open files. At that time, they are converted to the appropriate native format with the new to_path function before passing to the stdlib open functions. This has the advantage that all of the non-file-opening code can use a single type to hold paths (which is good since std::filesystem::path has proved problematic in some cases), but has the disadvantage that someone could add new code that forgets to convert to_path before opening. That's somewhat mitigated by the fact that most of the code uses the ModuleIOBase classes for opening files. Fixes #4995
Diffstat (limited to 'src/support')
-rw-r--r--src/support/command-line.cpp30
-rw-r--r--src/support/file.cpp18
-rw-r--r--src/support/path.cpp30
-rw-r--r--src/support/path.h14
4 files changed, 83 insertions, 9 deletions
diff --git a/src/support/command-line.cpp b/src/support/command-line.cpp
index 23ded0346..2b1b0b10b 100644
--- a/src/support/command-line.cpp
+++ b/src/support/command-line.cpp
@@ -17,6 +17,15 @@
#include "support/command-line.h"
#include "config.h"
#include "support/debug.h"
+#include "support/path.h"
+
+#ifdef USE_WSTRING_PATHS
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include "windows.h"
+#include "shellapi.h"
+#endif
using namespace wasm;
@@ -163,6 +172,23 @@ Options& Options::add_positional(const std::string& name,
}
void Options::parse(int argc, const char* argv[]) {
+
+// On Windows, get the wide char version of the command line flags, and convert
+// each one to std::string with UTF-8 manually. This means that all paths in
+// Binaryen are stored this way on all platforms right up until a library call
+// is made to open a file (at which point we use Path::to_path to convert back)
+// so that it works with the underlying Win32 APIs.
+// Only argList (and not argv) should be used below.
+#ifdef USE_WSTRING_PATHS
+ LPWSTR* argListW = CommandLineToArgvW(GetCommandLineW(), &argc);
+ std::vector<std::string> argList;
+ for (size_t i = 0, e = argc; i < e; ++i) {
+ argList.push_back(wasm::Path::wstring_to_string(argListW[i]));
+ }
+#else
+ const char** argList = argv;
+#endif
+
assert(argc > 0 && "expect at least program name as an argument");
size_t positionalsSeen = 0;
auto dashes = [](const std::string& s) {
@@ -174,7 +200,7 @@ void Options::parse(int argc, const char* argv[]) {
return s.size();
};
for (size_t i = 1, e = argc; i != e; ++i) {
- std::string currentOption = argv[i];
+ std::string currentOption = argList[i];
// "-" alone is a positional option
if (dashes(currentOption) == 0 || currentOption == "-") {
@@ -241,7 +267,7 @@ void Options::parse(int argc, const char* argv[]) {
<< currentOption << "'\n";
exit(EXIT_FAILURE);
}
- argument = argv[++i];
+ argument = argList[++i];
}
break;
case Arguments::Optional:
diff --git a/src/support/file.cpp b/src/support/file.cpp
index cfd656391..feb05e136 100644
--- a/src/support/file.cpp
+++ b/src/support/file.cpp
@@ -16,6 +16,7 @@
#include "support/file.h"
#include "support/debug.h"
+#include "support/path.h"
#include "support/utilities.h"
#include <cstdint>
@@ -57,7 +58,7 @@ T wasm::read_file(const std::string& filename, Flags::BinaryOption binary) {
if (binary == Flags::Binary) {
flags |= std::ifstream::binary;
}
- infile.open(filename, flags);
+ infile.open(wasm::Path::to_path(filename), flags);
if (!infile.is_open()) {
Fatal() << "Failed opening '" << filename << "'";
}
@@ -108,13 +109,15 @@ wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary)
buffer = std::cout.rdbuf();
} else {
BYN_TRACE("Opening '" << filename << "'\n");
- auto flags = std::ofstream::out | std::ofstream::trunc;
+ std::ios_base::openmode flags =
+ std::ofstream::out | std::ofstream::trunc;
if (binary == Flags::Binary) {
flags |= std::ofstream::binary;
}
- outfile.open(filename, flags);
+ outfile.open(wasm::Path::to_path(filename), flags);
if (!outfile.is_open()) {
- Fatal() << "Failed opening '" << filename << "'";
+ Fatal() << "Failed opening output file '" << filename
+ << "': " << strerror(errno);
}
buffer = outfile.rdbuf();
}
@@ -122,12 +125,13 @@ wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary)
}()) {}
void wasm::copy_file(std::string input, std::string output) {
- std::ifstream src(input, std::ios::binary);
- std::ofstream dst(output, std::ios::binary);
+ std::ifstream src(wasm::Path::to_path(input), std::ios::binary);
+ std::ofstream dst(wasm::Path::to_path(output), std::ios::binary);
dst << src.rdbuf();
}
size_t wasm::file_size(std::string filename) {
- std::ifstream infile(filename, std::ifstream::ate | std::ifstream::binary);
+ std::ifstream infile(wasm::Path::to_path(filename),
+ std::ifstream::ate | std::ifstream::binary);
return infile.tellg();
}
diff --git a/src/support/path.cpp b/src/support/path.cpp
index 1858fe1e9..284618f09 100644
--- a/src/support/path.cpp
+++ b/src/support/path.cpp
@@ -19,9 +19,39 @@
//
#include "support/path.h"
+#ifdef USE_WSTRING_PATHS
+#include "windows.h"
+#endif
namespace wasm::Path {
+#ifdef USE_WSTRING_PATHS
+PathString to_path(const std::string& s) { return string_to_wstring(s); }
+
+std::wstring string_to_wstring(const std::string& s) {
+ const char* inptr = s.data();
+ size_t inlen = s.size();
+ size_t outlen = MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, NULL, 0);
+ std::wstring outstr(outlen, 0);
+ const LPWSTR outptr = outstr.data();
+ MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, outptr, outlen);
+ return outstr;
+}
+
+std::string wstring_to_string(const std::wstring& s) {
+ const wchar_t* inptr = s.data();
+ size_t inlen = s.size();
+ size_t outlen =
+ WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, NULL, 0, NULL, NULL);
+ std::string outstr(outlen, 0);
+ const LPSTR outptr = outstr.data();
+ WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, outptr, outlen, NULL, NULL);
+ return outstr;
+}
+#else
+PathString to_path(const std::string& s) { return s; }
+#endif
+
char getPathSeparator() {
// TODO: use c++17's path separator
// http://en.cppreference.com/w/cpp/experimental/fs/path
diff --git a/src/support/path.h b/src/support/path.h
index 78e85ca5c..8383bc53e 100644
--- a/src/support/path.h
+++ b/src/support/path.h
@@ -24,8 +24,22 @@
#include <cstdlib>
#include <string>
+#if defined(_WIN32) && !defined(__MINGW32__)
+#define USE_WSTRING_PATHS 1
+#endif
+
namespace wasm::Path {
+#ifdef USE_WSTRING_PATHS
+using PathString = std::wstring;
+std::wstring string_to_wstring(const std::string& s);
+std::string wstring_to_string(const std::wstring& s);
+#else
+using PathString = std::string;
+#endif
+
+PathString to_path(const std::string& s);
+
char getPathSeparator();
std::string getDirName(const std::string& path);
std::string getBaseName(const std::string& path);