summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2023-09-14 14:08:40 -0700
committerGitHub <noreply@github.com>2023-09-14 14:08:40 -0700
commitf774effa54c6a40448487033a28a47caa3394f61 (patch)
treeee9764205ec4b6b916cc7e6e1ad94b59eb5f853d
parent11dba9b1c2ad988500b329727f39f4d8786918c5 (diff)
downloadbinaryen-f774effa54c6a40448487033a28a47caa3394f61.tar.gz
binaryen-f774effa54c6a40448487033a28a47caa3394f61.tar.bz2
binaryen-f774effa54c6a40448487033a28a47caa3394f61.zip
Encode command line to UTF8 on Windows (#5671)
This PR changes how file paths and the command line are handled. On startup on Windows, we process the wstring version of the command line (including the file paths) and re-encode it to UTF8 before handing it off to the rest of the command line handling logic. This means that all paths are stored in UTF8-encoded std::strings as they go through the program, right up until they are used to open files. At that time, they are converted to the appropriate native format with the new to_path function before passing to the stdlib open functions. This has the advantage that all of the non-file-opening code can use a single type to hold paths (which is good since std::filesystem::path has proved problematic in some cases), but has the disadvantage that someone could add new code that forgets to convert to_path before opening. That's somewhat mitigated by the fact that most of the code uses the ModuleIOBase classes for opening files. Fixes #4995
-rw-r--r--src/support/command-line.cpp30
-rw-r--r--src/support/file.cpp18
-rw-r--r--src/support/path.cpp30
-rw-r--r--src/support/path.h14
-rw-r--r--src/tools/wasm-opt.cpp7
-rw-r--r--src/wasm/wasm-io.cpp14
-rw-r--r--test/lit/unicode-filenames.wast21
7 files changed, 119 insertions, 15 deletions
diff --git a/src/support/command-line.cpp b/src/support/command-line.cpp
index 23ded0346..2b1b0b10b 100644
--- a/src/support/command-line.cpp
+++ b/src/support/command-line.cpp
@@ -17,6 +17,15 @@
#include "support/command-line.h"
#include "config.h"
#include "support/debug.h"
+#include "support/path.h"
+
+#ifdef USE_WSTRING_PATHS
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include "windows.h"
+#include "shellapi.h"
+#endif
using namespace wasm;
@@ -163,6 +172,23 @@ Options& Options::add_positional(const std::string& name,
}
void Options::parse(int argc, const char* argv[]) {
+
+// On Windows, get the wide char version of the command line flags, and convert
+// each one to std::string with UTF-8 manually. This means that all paths in
+// Binaryen are stored this way on all platforms right up until a library call
+// is made to open a file (at which point we use Path::to_path to convert back)
+// so that it works with the underlying Win32 APIs.
+// Only argList (and not argv) should be used below.
+#ifdef USE_WSTRING_PATHS
+ LPWSTR* argListW = CommandLineToArgvW(GetCommandLineW(), &argc);
+ std::vector<std::string> argList;
+ for (size_t i = 0, e = argc; i < e; ++i) {
+ argList.push_back(wasm::Path::wstring_to_string(argListW[i]));
+ }
+#else
+ const char** argList = argv;
+#endif
+
assert(argc > 0 && "expect at least program name as an argument");
size_t positionalsSeen = 0;
auto dashes = [](const std::string& s) {
@@ -174,7 +200,7 @@ void Options::parse(int argc, const char* argv[]) {
return s.size();
};
for (size_t i = 1, e = argc; i != e; ++i) {
- std::string currentOption = argv[i];
+ std::string currentOption = argList[i];
// "-" alone is a positional option
if (dashes(currentOption) == 0 || currentOption == "-") {
@@ -241,7 +267,7 @@ void Options::parse(int argc, const char* argv[]) {
<< currentOption << "'\n";
exit(EXIT_FAILURE);
}
- argument = argv[++i];
+ argument = argList[++i];
}
break;
case Arguments::Optional:
diff --git a/src/support/file.cpp b/src/support/file.cpp
index cfd656391..feb05e136 100644
--- a/src/support/file.cpp
+++ b/src/support/file.cpp
@@ -16,6 +16,7 @@
#include "support/file.h"
#include "support/debug.h"
+#include "support/path.h"
#include "support/utilities.h"
#include <cstdint>
@@ -57,7 +58,7 @@ T wasm::read_file(const std::string& filename, Flags::BinaryOption binary) {
if (binary == Flags::Binary) {
flags |= std::ifstream::binary;
}
- infile.open(filename, flags);
+ infile.open(wasm::Path::to_path(filename), flags);
if (!infile.is_open()) {
Fatal() << "Failed opening '" << filename << "'";
}
@@ -108,13 +109,15 @@ wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary)
buffer = std::cout.rdbuf();
} else {
BYN_TRACE("Opening '" << filename << "'\n");
- auto flags = std::ofstream::out | std::ofstream::trunc;
+ std::ios_base::openmode flags =
+ std::ofstream::out | std::ofstream::trunc;
if (binary == Flags::Binary) {
flags |= std::ofstream::binary;
}
- outfile.open(filename, flags);
+ outfile.open(wasm::Path::to_path(filename), flags);
if (!outfile.is_open()) {
- Fatal() << "Failed opening '" << filename << "'";
+ Fatal() << "Failed opening output file '" << filename
+ << "': " << strerror(errno);
}
buffer = outfile.rdbuf();
}
@@ -122,12 +125,13 @@ wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary)
}()) {}
void wasm::copy_file(std::string input, std::string output) {
- std::ifstream src(input, std::ios::binary);
- std::ofstream dst(output, std::ios::binary);
+ std::ifstream src(wasm::Path::to_path(input), std::ios::binary);
+ std::ofstream dst(wasm::Path::to_path(output), std::ios::binary);
dst << src.rdbuf();
}
size_t wasm::file_size(std::string filename) {
- std::ifstream infile(filename, std::ifstream::ate | std::ifstream::binary);
+ std::ifstream infile(wasm::Path::to_path(filename),
+ std::ifstream::ate | std::ifstream::binary);
return infile.tellg();
}
diff --git a/src/support/path.cpp b/src/support/path.cpp
index 1858fe1e9..284618f09 100644
--- a/src/support/path.cpp
+++ b/src/support/path.cpp
@@ -19,9 +19,39 @@
//
#include "support/path.h"
+#ifdef USE_WSTRING_PATHS
+#include "windows.h"
+#endif
namespace wasm::Path {
+#ifdef USE_WSTRING_PATHS
+PathString to_path(const std::string& s) { return string_to_wstring(s); }
+
+std::wstring string_to_wstring(const std::string& s) {
+ const char* inptr = s.data();
+ size_t inlen = s.size();
+ size_t outlen = MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, NULL, 0);
+ std::wstring outstr(outlen, 0);
+ const LPWSTR outptr = outstr.data();
+ MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, outptr, outlen);
+ return outstr;
+}
+
+std::string wstring_to_string(const std::wstring& s) {
+ const wchar_t* inptr = s.data();
+ size_t inlen = s.size();
+ size_t outlen =
+ WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, NULL, 0, NULL, NULL);
+ std::string outstr(outlen, 0);
+ const LPSTR outptr = outstr.data();
+ WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, outptr, outlen, NULL, NULL);
+ return outstr;
+}
+#else
+PathString to_path(const std::string& s) { return s; }
+#endif
+
char getPathSeparator() {
// TODO: use c++17's path separator
// http://en.cppreference.com/w/cpp/experimental/fs/path
diff --git a/src/support/path.h b/src/support/path.h
index 78e85ca5c..8383bc53e 100644
--- a/src/support/path.h
+++ b/src/support/path.h
@@ -24,8 +24,22 @@
#include <cstdlib>
#include <string>
+#if defined(_WIN32) && !defined(__MINGW32__)
+#define USE_WSTRING_PATHS 1
+#endif
+
namespace wasm::Path {
+#ifdef USE_WSTRING_PATHS
+using PathString = std::wstring;
+std::wstring string_to_wstring(const std::string& s);
+std::string wstring_to_string(const std::wstring& s);
+#else
+using PathString = std::string;
+#endif
+
+PathString to_path(const std::string& s);
+
char getPathSeparator();
std::string getDirName(const std::string& path);
std::string getBaseName(const std::string& path);
diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp
index 0d279dc08..5bbca1ea6 100644
--- a/src/tools/wasm-opt.cpp
+++ b/src/tools/wasm-opt.cpp
@@ -31,6 +31,7 @@
#include "support/command-line.h"
#include "support/debug.h"
#include "support/file.h"
+#include "support/path.h"
#include "wasm-binary.h"
#include "wasm-interpreter.h"
#include "wasm-io.h"
@@ -330,19 +331,19 @@ int main(int argc, const char* argv[]) {
if (emitJSWrapper.size() > 0) {
std::ofstream outfile;
- outfile.open(emitJSWrapper, std::ofstream::out);
+ outfile.open(wasm::Path::to_path(emitJSWrapper), std::ofstream::out);
outfile << generateJSWrapper(wasm);
outfile.close();
}
if (emitSpecWrapper.size() > 0) {
std::ofstream outfile;
- outfile.open(emitSpecWrapper, std::ofstream::out);
+ outfile.open(wasm::Path::to_path(emitSpecWrapper), std::ofstream::out);
outfile << generateSpecWrapper(wasm);
outfile.close();
}
if (emitWasm2CWrapper.size() > 0) {
std::ofstream outfile;
- outfile.open(emitWasm2CWrapper, std::ofstream::out);
+ outfile.open(wasm::Path::to_path(emitWasm2CWrapper), std::ofstream::out);
outfile << generateWasm2CWrapper(wasm);
outfile.close();
}
diff --git a/src/wasm/wasm-io.cpp b/src/wasm/wasm-io.cpp
index 65e6a982a..a340b4c23 100644
--- a/src/wasm/wasm-io.cpp
+++ b/src/wasm/wasm-io.cpp
@@ -26,6 +26,7 @@
#include "wasm-io.h"
#include "support/debug.h"
+#include "support/path.h"
#include "wasm-binary.h"
#include "wasm-s-parser.h"
#include "wat-parser.h"
@@ -69,7 +70,10 @@ void ModuleReader::readBinaryData(std::vector<char>& input,
parser.setSkipFunctionBodies(skipFunctionBodies);
if (sourceMapFilename.size()) {
sourceMapStream = std::make_unique<std::ifstream>();
- sourceMapStream->open(sourceMapFilename);
+ sourceMapStream->open(wasm::Path::to_path(sourceMapFilename));
+ if (!sourceMapStream->is_open()) {
+ Fatal() << "Failed opening '" << sourceMapFilename << "'";
+ }
parser.setDebugLocations(sourceMapStream.get());
}
parser.read();
@@ -89,7 +93,7 @@ void ModuleReader::readBinary(std::string filename,
bool ModuleReader::isBinaryFile(std::string filename) {
std::ifstream infile;
std::ios_base::openmode flags = std::ifstream::in | std::ifstream::binary;
- infile.open(filename, flags);
+ infile.open(wasm::Path::to_path(filename), flags);
char buffer[4] = {1, 2, 3, 4};
infile.read(buffer, 4);
infile.close();
@@ -157,7 +161,11 @@ void ModuleWriter::writeBinary(Module& wasm, Output& output) {
std::unique_ptr<std::ofstream> sourceMapStream;
if (sourceMapFilename.size()) {
sourceMapStream = std::make_unique<std::ofstream>();
- sourceMapStream->open(sourceMapFilename);
+ sourceMapStream->open(wasm::Path::to_path(sourceMapFilename));
+ if (!sourceMapStream->is_open()) {
+ Fatal() << "Failed opening sourcemap output file '" << sourceMapFilename
+ << "'";
+ }
writer.setSourceMap(sourceMapStream.get(), sourceMapUrl);
}
if (symbolMap.size() > 0) {
diff --git a/test/lit/unicode-filenames.wast b/test/lit/unicode-filenames.wast
new file mode 100644
index 000000000..f30ad9c09
--- /dev/null
+++ b/test/lit/unicode-filenames.wast
@@ -0,0 +1,21 @@
+;; RUN: wasm-as %s -o %t-❤.wasm --source-map %t-🗺️.map
+;; RUN: cat %t-🗺️.map | filecheck %s --check-prefix SOURCEMAP
+;; RUN: wasm-opt %t-❤.wasm -o %t-🤬.wasm --emit-js-wrapper %t-❤.js --input-source-map %t-🗺️.map --output-source-map %t-🗺️.out.map
+;; RUN: cat %t-🗺️.out.map | filecheck %s --check-prefix SOURCEMAP
+;; RUN: wasm-dis %t-🤬.wasm | filecheck %s --check-prefix MODULE
+
+;; MODULE: i32.add
+;; SOURCEMAP: src.cpp
+
+(module
+ (type $i32_i32_=>_i32 (func (param i32 i32) (result i32)))
+ (memory $0 256 256)
+ (export "add" (func $add))
+ (func $add (; 0 ;) (param $0 i32) (param $1 i32) (result i32)
+ (i32.add
+ ;;@ src.cpp:10:1
+ (local.get $0)
+ (local.get $1)
+ )
+ )
+)