diff options
author | John Wiegley <johnw@newartisans.com> | 2009-01-23 19:49:22 -0400 |
---|---|---|
committer | John Wiegley <johnw@newartisans.com> | 2009-01-23 19:50:00 -0400 |
commit | 900a92e1158cb178335d16ff0912f5fc5701da32 (patch) | |
tree | 7da8de9f4a95a18659434c1968c8af43ee34f435 /src | |
parent | f52e04c2bac1d4900bfe8963f369178f7f76023f (diff) | |
download | fork-ledger-900a92e1158cb178335d16ff0912f5fc5701da32.tar.gz fork-ledger-900a92e1158cb178335d16ff0912f5fc5701da32.tar.bz2 fork-ledger-900a92e1158cb178335d16ff0912f5fc5701da32.zip |
Added support for Unicode text in Ledger files, thanks to 'utfcpp', which can
be located at http://utfcpp.sourceforge.net.
Diffstat (limited to 'src')
-rw-r--r-- | src/format.cc | 81 | ||||
-rw-r--r-- | src/format.h | 52 | ||||
-rw-r--r-- | src/report.cc | 2 | ||||
-rw-r--r-- | src/session.cc | 4 | ||||
-rw-r--r-- | src/textual.cc | 28 |
5 files changed, 114 insertions, 53 deletions
diff --git a/src/format.cc b/src/format.cc index d1f606d3..a4596761 100644 --- a/src/format.cc +++ b/src/format.cc @@ -308,58 +308,60 @@ void format_t::format(std::ostream& out_str, scope_t& scope) break; } - string temp = out.str(); - - DEBUG("format.expr", "output = \"" << temp << "\""); + unistring temp(out.str()); + string result; if (! elem->has_flags(ELEMENT_FORMATTED) && - elem->max_width > 0 && elem->max_width < temp.length()) - out_str << truncate(temp, elem->max_width); - else - out_str << temp; + elem->max_width > 0 && elem->max_width < temp.length()) { + result = truncate(temp, elem->max_width); + } else { + result = temp.extract(); + for (int i = 0; i < (int)elem->min_width - (int)temp.length(); i++) + result += " "; + } + + out_str << result; } } -string format_t::truncate(const string& str, unsigned int width, +string format_t::truncate(const unistring& ustr, unsigned int width, const bool is_account) { - const unsigned int len = str.length(); - if (len <= width) - return str; - assert(width < 4095); - char buf[4096]; + const unsigned int len = ustr.length(); + if (len <= width) + return ustr.extract(); + + std::ostringstream buf; switch (elision_style) { case TRUNCATE_LEADING: // This method truncates at the beginning. - std::strncpy(buf, str.c_str() + (len - width), width); - buf[0] = '.'; - buf[1] = '.'; + buf << ".." << ustr.extract(len - width, width); break; case TRUNCATE_MIDDLE: // This method truncates in the middle. - std::strncpy(buf, str.c_str(), width / 2); - std::strncpy(buf + width / 2, - str.c_str() + (len - (width / 2 + width % 2)), - width / 2 + width % 2); - buf[width / 2 - 1] = '.'; - buf[width / 2] = '.'; + buf << ustr.extract(0, width / 2) + << ".." + << ustr.extract(len - (width / 2 + width % 2), + width / 2 + width % 2); break; case ABBREVIATE: if (is_account) { std::list<string> parts; string::size_type beg = 0; - for (string::size_type pos = str.find(':'); + string strcopy(ustr.extract()); + for (string::size_type pos = strcopy.find(':'); pos != string::npos; - beg = pos + 1, pos = str.find(':', beg)) - parts.push_back(string(str, beg, pos - beg)); - parts.push_back(string(str, beg)); + beg = pos + 1, pos = strcopy.find(':', beg)) + parts.push_back(string(strcopy, beg, pos - beg)); + parts.push_back(string(strcopy, beg)); + + std::ostringstream result; - string result; unsigned int newlen = len; for (std::list<string>::iterator i = parts.begin(); i != parts.end(); @@ -367,28 +369,26 @@ string format_t::truncate(const string& str, unsigned int width, // Don't contract the last element std::list<string>::iterator x = i; if (++x == parts.end()) { - result += *i; + result << *i; break; } if (newlen > width) { - result += string(*i, 0, abbrev_length); - result += ":"; - newlen -= (*i).length() - abbrev_length; + unistring temp(*i); + result << temp.extract(0, abbrev_length) << ":"; + newlen -= temp.length() - abbrev_length; } else { - result += *i; - result += ":"; + result << *i << ":"; } } if (newlen > width) { // Even abbreviated its too big to show the last account, so // abbreviate all but the last and truncate at the beginning. - std::strncpy(buf, result.c_str() + (result.length() - width), width); - buf[0] = '.'; - buf[1] = '.'; + unistring temp(result.str()); + buf << ".." << temp.extract(temp.length() - width, width); } else { - std::strcpy(buf, result.c_str()); + buf << result.str(); } break; } @@ -396,14 +396,11 @@ string format_t::truncate(const string& str, unsigned int width, case TRUNCATE_TRAILING: // This method truncates at the end (the default). - std::strncpy(buf, str.c_str(), width - 2); - buf[width - 2] = '.'; - buf[width - 1] = '.'; + buf << ustr.extract(0, width -2) << ".."; break; } - buf[width] = '\0'; - return buf; + return buf.str(); } } // namespace ledger diff --git a/src/format.h b/src/format.h index 13a2fff2..fbfe452e 100644 --- a/src/format.h +++ b/src/format.h @@ -32,13 +32,63 @@ #ifndef _FORMAT_H #define _FORMAT_H +#define SUPPORT_UNICODE 1 + #include "journal.h" #include "expr.h" +#if defined(SUPPORT_UNICODE) +#include "utf8.h" +#endif namespace ledger { DECLARE_EXCEPTION(format_error, std::runtime_error); +#if defined(SUPPORT_UNICODE) +/** + * @class unistring + * + * @brief Abstract working with UTF-32 encoded Unicode strings + * + * The input to the string is a UTF8 encoded ledger::string, which can + * then have its true length be taken, or characters extracted. + */ +class unistring +{ + std::vector<uint32_t> utf32chars; + +public: + unistring(const string& input) + { + TRACE_CTOR(unistring, ""); + + const char * p = input.c_str(); + std::size_t len = input.length(); + + VERIFY(utf8::is_valid(p, p + len)); + + utf8::utf8to32(p, p + len, std::back_inserter(utf32chars)); + } + ~unistring() { + TRACE_DTOR(unistring); + } + + std::size_t length() const { + return utf32chars.size(); + } + + string extract(const std::size_t begin = 0, + const std::size_t len = 0) const + { + string utf8result; + utf8::utf32to8(utf32chars.begin() + begin, + utf32chars.begin() + begin + (len ? len : length()), + std::back_inserter(utf8result)); + return utf8result; + } +}; +#endif + class report_t; class format_t : public noncopyable @@ -135,7 +185,7 @@ public: elem->dump(out); } - static string truncate(const string& str, unsigned int width, + static string truncate(const unistring& str, unsigned int width, const bool is_account = false); }; diff --git a/src/report.cc b/src/report.cc index 2dfc9d76..4b45577b 100644 --- a/src/report.cc +++ b/src/report.cc @@ -789,6 +789,8 @@ expr_t::ptr_op_t report_t::lookup(const string& name) case 't': if (std::strcmp(p, "total_expr") == 0) return MAKE_FUNCTOR(report_t::get_total_expr); + else if (std::strcmp(p, "truncate") == 0) + return MAKE_FUNCTOR(report_t::get_total_expr); break; } diff --git a/src/session.cc b/src/session.cc index 9a3cccd5..8e78e9e9 100644 --- a/src/session.cc +++ b/src/session.cc @@ -71,9 +71,9 @@ void release_session_context() session_t::session_t() : register_format - ("%-.9(date) %-.20(payee) %-.23(account) %!12(print_balance(amount_expr, 12, 67)) " + ("%-.9(date) %-.20(payee) %-.23(account(23)) %!12(print_balance(amount_expr, 12, 67)) " "%!12(print_balance(display_total, 12, 80, true))\n%/" - "%31|%-.23(account) %!12(print_balance(amount_expr, 12, 67)) " + "%31|%-.23(account(23)) %!12(print_balance(amount_expr, 12, 67)) " "%!12(print_balance(display_total, 12, 80, true))\n"), wide_register_format ("%-.9D %-.35P %-.39A %22.108t %!22.132T\n%/" diff --git a/src/textual.cc b/src/textual.cc index e1a08f69..4600f3f3 100644 --- a/src/textual.cc +++ b/src/textual.cc @@ -596,10 +596,16 @@ static inline void parse_symbol(char *& p, string& symbol) bool textual_parser_t::test(std::istream& in) const { - char buf[5]; + char buf[12]; + char * p; - in.read(buf, 5); - if (std::strncmp(buf, "<?xml", 5) == 0) { + in.read(buf, 11); + if (utf8::is_bom(buf)) + p = &buf[3]; + else + p = buf; + + if (std::strncmp(p, "<?xml", 5) == 0) { #if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) throw parse_error("Ledger file contains XML data, but format was not recognized"); #else @@ -688,10 +694,11 @@ unsigned int textual_parser_t::parse(std::istream& in, { TRACE_START(parsing_total, 1, "Total time spent parsing text:"); - static bool added_auto_entry_hook = false; - static char line[MAX_LINE + 1]; - unsigned int count = 0; - unsigned int errors = 0; + static bool added_auto_entry_hook = false; + static char linebuf[MAX_LINE + 1]; + char * line; + unsigned int count = 0; + unsigned int errors = 0; std::list<account_t *> account_stack; auto_entry_finalizer_t auto_entry_finalizer(&journal); @@ -714,10 +721,15 @@ unsigned int textual_parser_t::parse(std::istream& in, while (in.good() && ! in.eof()) { try { - in.getline(line, MAX_LINE); + in.getline(linebuf, MAX_LINE); if (in.eof()) break; + if (linenum == 1 && utf8::is_bom(linebuf)) + line = &linebuf[3]; + else + line = linebuf; + int len = std::strlen(line); if (line[len - 1] == '\r') line[--len] = '\0'; |