summaryrefslogtreecommitdiff
path: root/lib/utfcpp/v3/samples/docsample.cpp
diff options
context:
space:
mode:
authorAlexis Hildebrandt <afh@surryhill.net>2023-02-01 15:29:14 +0800
committerMartin Michlmayr <tbm@cyrius.com>2023-02-01 15:29:14 +0800
commitdd2fde2aa87372189e2548ef556b50a0b8ee45f2 (patch)
tree8d8c4461b6b4ec0b7f617d1b2dc3f48766730242 /lib/utfcpp/v3/samples/docsample.cpp
parent782fdb71b4c6131927317c0de72f3b72a2d37258 (diff)
downloadfork-ledger-dd2fde2aa87372189e2548ef556b50a0b8ee45f2.tar.gz
fork-ledger-dd2fde2aa87372189e2548ef556b50a0b8ee45f2.tar.bz2
fork-ledger-dd2fde2aa87372189e2548ef556b50a0b8ee45f2.zip
Updated lib/utfcpp
* Squashed 'lib/utfcpp/' content from commit 79835a5fa git-subtree-dir: lib/utfcpp git-subtree-split: 79835a5fa57271f07a90ed36123e30ae9741178e * Change include path to utfcpp
Diffstat (limited to 'lib/utfcpp/v3/samples/docsample.cpp')
-rw-r--r--lib/utfcpp/v3/samples/docsample.cpp64
1 files changed, 64 insertions, 0 deletions
diff --git a/lib/utfcpp/v3/samples/docsample.cpp b/lib/utfcpp/v3/samples/docsample.cpp
new file mode 100644
index 00000000..65338872
--- /dev/null
+++ b/lib/utfcpp/v3/samples/docsample.cpp
@@ -0,0 +1,64 @@
+#include "../source/utf8.h"
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+
+using namespace std;
+
+int main(int argc, char** argv)
+{
+ if (argc != 2) {
+ cout << "\nUsage: docsample filename\n";
+ return 0;
+ }
+ const char* test_file_path = argv[1];
+ // Open the test file (must be UTF-8 encoded)
+ ifstream fs8(test_file_path);
+ if (!fs8.is_open()) {
+ cout << "Could not open " << test_file_path << endl;
+ return 0;
+ }
+
+ unsigned line_count = 1;
+ string line;
+ // Play with all the lines in the file
+ while (getline(fs8, line)) {
+ // check for invalid utf-8 (for a simple yes/no check, there is also utf8::is_valid function)
+#if __cplusplus >= 201103L // C++ 11 or later
+ auto end_it = utf8::find_invalid(line.begin(), line.end());
+#else
+ string::iterator end_it = utf8::find_invalid(line.begin(), line.end());
+#endif // C++ 11
+ if (end_it != line.end()) {
+ cout << "Invalid UTF-8 encoding detected at line " << line_count << "\n";
+ cout << "This part is fine: " << string(line.begin(), end_it) << "\n";
+ }
+ // Get the line length (at least for the valid part)
+ ptrdiff_t length = utf8::distance(line.begin(), end_it);
+ cout << "Length of line " << line_count << " is " << length << "\n";
+
+ // Convert it to utf-16
+#if __cplusplus >= 201103L // C++ 11 or later
+ u16string utf16line = utf8::utf8to16(line);
+#else
+ vector<unsigned short> utf16line;
+ utf8::utf8to16(line.begin(), end_it, back_inserter(utf16line));
+#endif // C++ 11
+ // And back to utf-8;
+#if __cplusplus >= 201103L // C++ 11 or later
+ string utf8line = utf8::utf16to8(utf16line);
+#else
+ string utf8line;
+ utf8::utf16to8(utf16line.begin(), utf16line.end(), back_inserter(utf8line));
+#endif // C++ 11
+ // Confirm that the conversion went OK:
+ if (utf8line != string(line.begin(), end_it))
+ cout << "Error in UTF-16 conversion at line: " << line_count << "\n";
+
+ line_count++;
+ }
+
+ return 0;
+}