diff options
author | John Wiegley <johnw@newartisans.com> | 2009-02-12 02:34:39 -0400 |
---|---|---|
committer | John Wiegley <johnw@newartisans.com> | 2009-02-12 02:34:39 -0400 |
commit | 6f2e3b88649543d625b76ac5542dd8838b28f0b1 (patch) | |
tree | af76d46718d4b2b3e51c05c3c53a09987505f02f /src/commodity.cc | |
parent | 9c9320bc586bc80922921945bfe6bb704c36c624 (diff) | |
download | fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.tar.gz fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.tar.bz2 fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.zip |
Properly handle UTF-8 characters in commodity strings.
Diffstat (limited to 'src/commodity.cc')
-rw-r--r-- | src/commodity.cc | 50 |
1 files changed, 49 insertions, 1 deletions
diff --git a/src/commodity.cc b/src/commodity.cc index 5169e3af..2dfba880 100644 --- a/src/commodity.cc +++ b/src/commodity.cc @@ -571,7 +571,55 @@ void commodity_t::parse_symbol(std::istream& in, string& symbol) else throw_(amount_error, "Quoted commodity symbol lacks closing quote"); } else { - READ_INTO(in, buf, 255, c, ! invalid_chars[static_cast<unsigned char>(c)]); + char * _p = buf; + c = in.peek(); + while (_p - buf < 255 && in.good() && ! in.eof() && c != '\n') { + int bytes = 0; + int size = _p - buf; + + unsigned char d = c; + + // Check for the start of a UTF-8 multi-byte encoded string + if (d >= 192 && d <= 223 && size < 254) + bytes = 2; + else if (d >= 224 && d <= 239 && size < 253) + bytes = 3; + else if (d >= 240 && d <= 247 && size < 252) + bytes = 4; + else if (d >= 248 && d <= 251 && size < 251) + bytes = 5; + else if (d >= 252 && d <= 253 && size < 250) + bytes = 6; + else if (d >= 254) // UTF-8 encoding error + break; + + if (bytes > 0) { // we're looking at a UTF-8 encoding + for (int i = 0; i < bytes; i++) { + in.get(c); + if (in.bad() || in.eof()) + break; + *_p++ = c; + } + } + else if (invalid_chars[static_cast<unsigned char>(c)]) { + break; + } + else { + in.get(c); + if (in.eof()) + break; + if (c == '\\') { + in.get(c); + if (in.eof()) + break; + } + *_p++ = c; + } + + c = in.peek(); + } + *_p = '\0'; + if (is_reserved_token(buf)) buf[0] = '\0'; } |