diff options
author | John Wiegley <johnw@newartisans.com> | 2009-02-12 02:34:39 -0400 |
---|---|---|
committer | John Wiegley <johnw@newartisans.com> | 2009-02-12 02:34:39 -0400 |
commit | 6f2e3b88649543d625b76ac5542dd8838b28f0b1 (patch) | |
tree | af76d46718d4b2b3e51c05c3c53a09987505f02f | |
parent | 9c9320bc586bc80922921945bfe6bb704c36c624 (diff) | |
download | fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.tar.gz fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.tar.bz2 fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.zip |
Properly handle UTF-8 characters in commodity strings.
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | doc/sample.dat | 46 | ||||
-rw-r--r-- | src/amount.cc | 15 | ||||
-rw-r--r-- | src/amount.h | 1 | ||||
-rw-r--r-- | src/balance.cc | 12 | ||||
-rw-r--r-- | src/commodity.cc | 50 | ||||
-rw-r--r-- | src/format.h | 48 | ||||
-rw-r--r-- | src/report.cc | 5 | ||||
-rw-r--r-- | src/system.hh | 3 | ||||
-rw-r--r-- | src/utils.h | 76 | ||||
-rw-r--r-- | src/value.cc | 2 |
11 files changed, 136 insertions, 123 deletions
diff --git a/Makefile.am b/Makefile.am index b4c21869..22110b57 100644 --- a/Makefile.am +++ b/Makefile.am @@ -84,6 +84,7 @@ pkginclude_HEADERS = \ src/mask.h \ src/stream.h \ src/pstream.h \ + src/unistring.h \ \ src/amount.h \ src/commodity.h \ diff --git a/doc/sample.dat b/doc/sample.dat index b35991fe..5b34157e 100644 --- a/doc/sample.dat +++ b/doc/sample.dat @@ -1,42 +1,42 @@ N $ = account =~ /^Expenses:Books/ - (Liabilities:Taxes) -0.10 + (Liabilities:Taxes) -0.10 ~ Monthly - Assets:Bank:Checking $500.00 - Income:Salary + Assets:Bank:Checking $500.00 + Income:Salary 2004/05/01 * Checking balance - Assets:Bank:Checking $1,000.00 - Equity:Opening Balances + Assets:Bank:Checking $1,000.00 + Equity:Opening Balances 2004/05/03=2004/05/01 * Investment balance - Assets:Brokerage 50 AAPL @ $30.00 - Equity:Opening Balances + Assets:Brokerage 50 AAPL @ $30.00 + Equity:Opening Balances 2004/05/14 * Páy dày - Assets:Bank:Checking $500.00 - Income:Salary + Assets:Bank:Checking 500.00€ + Income:Salary 2004/05/14 * Another dày in which there is Páying - Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00 - Income:Salary + Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00 + Income:Salary 2004/05/14 * Another dày in which there is Páying - Русский язык:Русский язык:Русский язык:Русский язык $1000.00 - Income:Salary + Русский язык:Русский язык:Русский язык:Русский язык $1000.00 + Income:Salary 2004/05/27 Book Store - Expenses:Books $20.00 - Liabilities:MasterCard + Expenses:Books $20.00 + Liabilities:MasterCard 2004/05/27 (100) Credit card company - ; This is an entry note! - ; Sample: Value - Liabilities:MasterCard $20.00 - ; This is a transaction note! - ; Sample: Another Value - ; :MyTag: - Assets:Bank:Checking - ; :AnotherTag: + ; This is an entry note! + ; Sample: Value + Liabilities:MasterCard $20.00 + ; This is a transaction note! + ; Sample: Another Value + ; :MyTag: + Assets:Bank:Checking + ; :AnotherTag: diff --git a/src/amount.cc b/src/amount.cc index 3fb8ddaf..ffeaeea6 100644 --- a/src/amount.cc +++ b/src/amount.cc @@ -31,6 +31,7 @@ #include "amount.h" #include "commodity.h" +#include "unistring.h" namespace ledger { @@ -1001,6 +1002,20 @@ void amount_t::print(std::ostream& _out) const _out << out.str(); } +void amount_t::right_justify(std::ostream& out, int width) const +{ + std::ostringstream buf; + buf << *this; + + unistring temp(buf.str()); + + int spacing = width - int(temp.length()); + while (spacing-- > 0) + out << ' '; + + out << temp.extract(); +} + bool amount_t::valid() const { if (quantity) { diff --git a/src/amount.h b/src/amount.h index 0943a4c0..a9077a4b 100644 --- a/src/amount.h +++ b/src/amount.h @@ -640,6 +640,7 @@ public: of its commodity's display precision. */ void print(std::ostream& out) const; + void right_justify(std::ostream& out, int width) const; /*@}*/ diff --git a/src/balance.cc b/src/balance.cc index c9ca148d..7f8ed572 100644 --- a/src/balance.cc +++ b/src/balance.cc @@ -240,17 +240,11 @@ void balance_t::print(std::ostream& out, first = false; width = first_width; } - - out.width(width); - out.fill(' '); - out << std::right << *amount; + amount->right_justify(out, width); } - if (first) { - out.width(first_width); - out.fill(' '); - out << std::right << "0"; - } + if (first) + amount_t(0L).right_justify(out, first_width); } } // namespace ledger diff --git a/src/commodity.cc b/src/commodity.cc index 5169e3af..2dfba880 100644 --- a/src/commodity.cc +++ b/src/commodity.cc @@ -571,7 +571,55 @@ void commodity_t::parse_symbol(std::istream& in, string& symbol) else throw_(amount_error, "Quoted commodity symbol lacks closing quote"); } else { - READ_INTO(in, buf, 255, c, ! invalid_chars[static_cast<unsigned char>(c)]); + char * _p = buf; + c = in.peek(); + while (_p - buf < 255 && in.good() && ! in.eof() && c != '\n') { + int bytes = 0; + int size = _p - buf; + + unsigned char d = c; + + // Check for the start of a UTF-8 multi-byte encoded string + if (d >= 192 && d <= 223 && size < 254) + bytes = 2; + else if (d >= 224 && d <= 239 && size < 253) + bytes = 3; + else if (d >= 240 && d <= 247 && size < 252) + bytes = 4; + else if (d >= 248 && d <= 251 && size < 251) + bytes = 5; + else if (d >= 252 && d <= 253 && size < 250) + bytes = 6; + else if (d >= 254) // UTF-8 encoding error + break; + + if (bytes > 0) { // we're looking at a UTF-8 encoding + for (int i = 0; i < bytes; i++) { + in.get(c); + if (in.bad() || in.eof()) + break; + *_p++ = c; + } + } + else if (invalid_chars[static_cast<unsigned char>(c)]) { + break; + } + else { + in.get(c); + if (in.eof()) + break; + if (c == '\\') { + in.get(c); + if (in.eof()) + break; + } + *_p++ = c; + } + + c = in.peek(); + } + *_p = '\0'; + if (is_reserved_token(buf)) buf[0] = '\0'; } diff --git a/src/format.h b/src/format.h index 49350104..d1716dbf 100644 --- a/src/format.h +++ b/src/format.h @@ -47,58 +47,12 @@ #define _FORMAT_H #include "expr.h" +#include "unistring.h" namespace ledger { DECLARE_EXCEPTION(format_error, std::runtime_error); -#if defined(SUPPORT_UNICODE) - -/** - * @class unistring - * - * @brief Abstract working with UTF-32 encoded Unicode strings - * - * The input to the string is a UTF8 encoded ledger::string, which can - * then have its true length be taken, or characters extracted. - */ -class unistring -{ - std::vector<uint32_t> utf32chars; - -public: - unistring(const string& input) - { - TRACE_CTOR(unistring, ""); - - const char * p = input.c_str(); - std::size_t len = input.length(); - - VERIFY(utf8::is_valid(p, p + len)); - - utf8::utf8to32(p, p + len, std::back_inserter(utf32chars)); - } - ~unistring() { - TRACE_DTOR(unistring); - } - - std::size_t length() const { - return utf32chars.size(); - } - - string extract(const std::size_t begin = 0, - const std::size_t len = 0) const - { - string utf8result; - utf8::utf32to8(utf32chars.begin() + begin, - utf32chars.begin() + begin + (len ? len : length()), - std::back_inserter(utf8result)); - return utf8result; - } -}; - -#endif // SUPPORT_UNICODE - class report_t; /** diff --git a/src/report.cc b/src/report.cc index e319c8aa..7d8353d9 100644 --- a/src/report.cc +++ b/src/report.cc @@ -68,7 +68,8 @@ report_t::report_t(session_t& _session) : session(_session) " %12(amount)%(comment | \"\")\n%/\n"); HANDLER(balance_format_).on( - "%20(strip(display_total)) %(depth_spacer)%-(partial_account)\n"); + "%20(print_balance(strip(display_total), 20))" + " %(depth_spacer)%-(partial_account)\n"); HANDLER(equity_format_).on("\n%D %Y%C%P\n%/ %-34W %12t\n"); @@ -191,7 +192,7 @@ value_t report_t::fn_print_balance(call_scope_t& args) std::ostringstream out; args[0].strip_annotations(what_to_keep()) - .print(out, *first_width, *latter_width, + .print(out, *first_width, latter_width ? *latter_width : -1, HANDLED(date_format_) ? HANDLER(date_format_).str() : optional<string>()); diff --git a/src/system.hh b/src/system.hh index bffd041c..96b574e5 100644 --- a/src/system.hh +++ b/src/system.hh @@ -138,10 +138,7 @@ typedef std::ostream::pos_type ostream_pos_type; #include <gmp.h> #include <mpfr.h> #include "sha1.h" -#define SUPPORT_UNICODE 1 -#if defined(SUPPORT_UNICODE) #include "utf8.h" -#endif #ifdef HAVE_LIBEDIT #include <editline/readline.h> diff --git a/src/utils.h b/src/utils.h index d4011442..93cb9a4a 100644 --- a/src/utils.h +++ b/src/utils.h @@ -546,50 +546,52 @@ inline char * next_element(char * buf, bool variable = false) { inline char peek_next_nonws(std::istream& in) { char c = in.peek(); - while (! in.eof() && std::isspace(c)) { + while (in.good() && ! in.eof() && std::isspace(c)) { in.get(c); c = in.peek(); } return c; } -#define READ_INTO(str, targ, size, var, cond) { \ - char * _p = targ; \ - var = str.peek(); \ - while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \ - str.get(var); \ - if (str.eof()) \ - break; \ - if (var == '\\') { \ - str.get(var); \ - if (in.eof()) \ - break; \ - } \ - *_p++ = var; \ - var = str.peek(); \ - } \ - *_p = '\0'; \ -} +#define READ_INTO(str, targ, size, var, cond) { \ + char * _p = targ; \ + var = str.peek(); \ + while (str.good() && ! str.eof() && var != '\n' && \ + (cond) && _p - targ < size) { \ + str.get(var); \ + if (str.eof()) \ + break; \ + if (var == '\\') { \ + str.get(var); \ + if (in.eof()) \ + break; \ + } \ + *_p++ = var; \ + var = str.peek(); \ + } \ + *_p = '\0'; \ + } -#define READ_INTO_(str, targ, size, var, idx, cond) { \ - char * _p = targ; \ - var = str.peek(); \ - while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \ - str.get(var); \ - if (str.eof()) \ - break; \ - idx++; \ - if (var == '\\') { \ - str.get(var); \ - if (in.eof()) \ - break; \ - idx++; \ - } \ - *_p++ = var; \ - var = str.peek(); \ - } \ - *_p = '\0'; \ -} +#define READ_INTO_(str, targ, size, var, idx, cond) { \ + char * _p = targ; \ + var = str.peek(); \ + while (str.good() && ! str.eof() && var != '\n' && \ + (cond) && _p - targ < size) { \ + str.get(var); \ + if (str.eof()) \ + break; \ + idx++; \ + if (var == '\\') { \ + str.get(var); \ + if (in.eof()) \ + break; \ + idx++; \ + } \ + *_p++ = var; \ + var = str.peek(); \ + } \ + *_p = '\0'; \ + } } // namespace ledger diff --git a/src/value.cc b/src/value.cc index 482dca21..a39eada2 100644 --- a/src/value.cc +++ b/src/value.cc @@ -1271,7 +1271,7 @@ void value_t::print(std::ostream& out, break; case AMOUNT: - out << as_amount(); + as_amount().right_justify(out, first_width); break; case STRING: |