1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
#include "../extern/ftest/ftest.h"
#include "utf8.h"
#include <string>
using namespace utf8;
using namespace std;
#if __cplusplus >= 201103L // C++ 11 or later
TEST(CPP11APITests, test_append)
{
string u;
append(0x0448, u);
EXPECT_EQ (u[0], char(0xd1));
EXPECT_EQ (u[1], char(0x88));
EXPECT_EQ (u.length(), 2);
u.clear();
append(0x65e5, u);
EXPECT_EQ (u[0], char(0xe6));
EXPECT_EQ (u[1], char(0x97));
EXPECT_EQ (u[2], char(0xa5));
EXPECT_EQ (u.length(), 3);
u.clear();
append(0x3044, u);
EXPECT_EQ (u[0], char(0xe3));
EXPECT_EQ (u[1], char(0x81));
EXPECT_EQ (u[2], char(0x84));
EXPECT_EQ (u.length(), 3);
u.clear();
append(0x10346, u);
EXPECT_EQ (u[0], char(0xf0));
EXPECT_EQ (u[1], char(0x90));
EXPECT_EQ (u[2], char(0x8d));
EXPECT_EQ (u[3], char(0x86));
EXPECT_EQ (u.length(), 4);
}
TEST(CPP11APITests, test_append16)
{
u16string u;
append16(0x0448, u);
EXPECT_EQ (u[0], char16_t(0x0448));
EXPECT_EQ (u.length(), 1);
}
TEST(CPP11APITests, test_utf16to8)
{
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
string u = utf16to8(utf16string);
EXPECT_EQ (u.size(), 10);
u16string h16 = u"h!";
string h8;
utf8::unchecked::utf16to8(h16.begin(), h16.end(), std::back_inserter(h8));
EXPECT_EQ (h8, "h!");
}
TEST(CPP11APITests, test_utf8to16)
{
string utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
u16string utf16result = utf8to16(utf8_with_surrogates);
EXPECT_EQ (utf16result.size(), 4);
EXPECT_EQ (utf16result[2], 0xd834);
EXPECT_EQ (utf16result[3], 0xdd1e);
// Just to make sure it compiles with string literals
utf8to16(u8"simple");
utf8to16("simple");
}
TEST(CPP11APITests, test_utf32to8)
{
u32string utf32string = {0x448, 0x65E5, 0x10346};
string utf8result = utf32to8(utf32string);
EXPECT_EQ (utf8result.size(), 9);
}
TEST(CPP11APITests, test_utf8to32)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
u32string utf32result = utf8to32(twochars);
EXPECT_EQ (utf32result.size(), 2);
}
TEST(CPP11APITests, test_find_invalid)
{
string utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa";
auto invalid = find_invalid(utf_invalid);
EXPECT_EQ (invalid, 5);
}
TEST(CPP11APITests, test_is_valid)
{
string utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa";
bool bvalid = is_valid(utf_invalid);
EXPECT_FALSE (bvalid);
string utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
bvalid = is_valid(utf8_with_surrogates);
EXPECT_TRUE (bvalid);
}
TEST(CPP11APITests, test_replace_invalid)
{
string invalid_sequence = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
string replace_invalid_result = replace_invalid(invalid_sequence, '?');
bool bvalid = is_valid(replace_invalid_result);
EXPECT_TRUE (bvalid);
const string fixed_invalid_sequence = "a????z";
EXPECT_EQ(fixed_invalid_sequence, replace_invalid_result);
}
TEST(CPP11APITests, test_starts_with_bom)
{
string byte_order_mark = {char(0xef), char(0xbb), char(0xbf)};
bool bbom = starts_with_bom(byte_order_mark);
EXPECT_TRUE (bbom);
string threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
bool no_bbom = starts_with_bom(threechars);
EXPECT_FALSE (no_bbom);
}
#endif // C++ 11 or later
|