summaryrefslogtreecommitdiff
path: root/src/commodity.cc
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-02-12 02:34:39 -0400
committerJohn Wiegley <johnw@newartisans.com>2009-02-12 02:34:39 -0400
commit6f2e3b88649543d625b76ac5542dd8838b28f0b1 (patch)
treeaf76d46718d4b2b3e51c05c3c53a09987505f02f /src/commodity.cc
parent9c9320bc586bc80922921945bfe6bb704c36c624 (diff)
downloadfork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.tar.gz
fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.tar.bz2
fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.zip
Properly handle UTF-8 characters in commodity strings.
Diffstat (limited to 'src/commodity.cc')
-rw-r--r--src/commodity.cc50
1 files changed, 49 insertions, 1 deletions
diff --git a/src/commodity.cc b/src/commodity.cc
index 5169e3af..2dfba880 100644
--- a/src/commodity.cc
+++ b/src/commodity.cc
@@ -571,7 +571,55 @@ void commodity_t::parse_symbol(std::istream& in, string& symbol)
else
throw_(amount_error, "Quoted commodity symbol lacks closing quote");
} else {
- READ_INTO(in, buf, 255, c, ! invalid_chars[static_cast<unsigned char>(c)]);
+ char * _p = buf;
+ c = in.peek();
+ while (_p - buf < 255 && in.good() && ! in.eof() && c != '\n') {
+ int bytes = 0;
+ int size = _p - buf;
+
+ unsigned char d = c;
+
+ // Check for the start of a UTF-8 multi-byte encoded string
+ if (d >= 192 && d <= 223 && size < 254)
+ bytes = 2;
+ else if (d >= 224 && d <= 239 && size < 253)
+ bytes = 3;
+ else if (d >= 240 && d <= 247 && size < 252)
+ bytes = 4;
+ else if (d >= 248 && d <= 251 && size < 251)
+ bytes = 5;
+ else if (d >= 252 && d <= 253 && size < 250)
+ bytes = 6;
+ else if (d >= 254) // UTF-8 encoding error
+ break;
+
+ if (bytes > 0) { // we're looking at a UTF-8 encoding
+ for (int i = 0; i < bytes; i++) {
+ in.get(c);
+ if (in.bad() || in.eof())
+ break;
+ *_p++ = c;
+ }
+ }
+ else if (invalid_chars[static_cast<unsigned char>(c)]) {
+ break;
+ }
+ else {
+ in.get(c);
+ if (in.eof())
+ break;
+ if (c == '\\') {
+ in.get(c);
+ if (in.eof())
+ break;
+ }
+ *_p++ = c;
+ }
+
+ c = in.peek();
+ }
+ *_p = '\0';
+
if (is_reserved_token(buf))
buf[0] = '\0';
}