diff options
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | doc/LICENSE-utfcpp | 23 | ||||
-rw-r--r-- | doc/sample.dat | 10 | ||||
m--------- | lib/utfcpp | 0 | ||||
-rw-r--r-- | src/format.cc | 81 | ||||
-rw-r--r-- | src/format.h | 52 | ||||
-rw-r--r-- | src/report.cc | 2 | ||||
-rw-r--r-- | src/session.cc | 4 | ||||
-rw-r--r-- | src/textual.cc | 28 |
10 files changed, 151 insertions, 55 deletions
diff --git a/.gitmodules b/.gitmodules index b685b019..33b949c0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "lib/libofx"] path = lib/libofx url = git://newartisans.com/libofx.git +[submodule "lib/utfcpp"] + path = lib/utfcpp + url = git://github.com/jwiegley/utfcpp.git diff --git a/Makefile.am b/Makefile.am index c360bf96..5c8384a9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,7 +5,8 @@ EXTRA_DIST = autogen.sh contrib lib_LTLIBRARIES = libamounts.la libledger.la -libamounts_la_CPPFLAGS = -I$(srcdir)/src -I$(srcdir)/lib +libamounts_la_CPPFLAGS = -I$(srcdir)/src -I$(srcdir)/lib \ + -I$(srcdir)/lib/utfcpp/source if HAVE_GDTOA libamounts_la_CPPFLAGS += -I$(top_builddir)/lib/gdtoa -I$(srcdir)/lib/gdtoa endif diff --git a/doc/LICENSE-utfcpp b/doc/LICENSE-utfcpp new file mode 100644 index 00000000..1751a003 --- /dev/null +++ b/doc/LICENSE-utfcpp @@ -0,0 +1,23 @@ +Copyright 2006 Nemanja Trifunovic + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/doc/sample.dat b/doc/sample.dat index 907f19ab..a579dd0a 100644 --- a/doc/sample.dat +++ b/doc/sample.dat @@ -15,10 +15,18 @@ N $ Assets:Brokerage 50 AAPL @ $30.00 Equity:Opening Balances -2004/05/14 * Pay day +2004/05/14 * Páy dày Assets:Bank:Checking $500.00 Income:Salary +2004/05/14 * Another dày in which there is Páying + Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00 + Income:Salary + +2004/05/14 * Another dày in which there is Páying + Русский язык:Русский язык:Русский язык:Русский язык $1000.00 + Income:Salary + 2004/05/27 Book Store Expenses:Books $20.00 Liabilities:MasterCard diff --git a/lib/utfcpp b/lib/utfcpp new file mode 160000 +Subproject 54676a423c356bf128f9c8fc0e7ea68cbcb7587 diff --git a/src/format.cc b/src/format.cc index d1f606d3..a4596761 100644 --- a/src/format.cc +++ b/src/format.cc @@ -308,58 +308,60 @@ void format_t::format(std::ostream& out_str, scope_t& scope) break; } - string temp = out.str(); - - DEBUG("format.expr", "output = \"" << temp << "\""); + unistring temp(out.str()); + string result; if (! elem->has_flags(ELEMENT_FORMATTED) && - elem->max_width > 0 && elem->max_width < temp.length()) - out_str << truncate(temp, elem->max_width); - else - out_str << temp; + elem->max_width > 0 && elem->max_width < temp.length()) { + result = truncate(temp, elem->max_width); + } else { + result = temp.extract(); + for (int i = 0; i < (int)elem->min_width - (int)temp.length(); i++) + result += " "; + } + + out_str << result; } } -string format_t::truncate(const string& str, unsigned int width, +string format_t::truncate(const unistring& ustr, unsigned int width, const bool is_account) { - const unsigned int len = str.length(); - if (len <= width) - return str; - assert(width < 4095); - char buf[4096]; + const unsigned int len = ustr.length(); + if (len <= width) + return ustr.extract(); + + std::ostringstream buf; switch (elision_style) { case TRUNCATE_LEADING: // This method truncates at the beginning. - std::strncpy(buf, str.c_str() + (len - width), width); - buf[0] = '.'; - buf[1] = '.'; + buf << ".." << ustr.extract(len - width, width); break; case TRUNCATE_MIDDLE: // This method truncates in the middle. - std::strncpy(buf, str.c_str(), width / 2); - std::strncpy(buf + width / 2, - str.c_str() + (len - (width / 2 + width % 2)), - width / 2 + width % 2); - buf[width / 2 - 1] = '.'; - buf[width / 2] = '.'; + buf << ustr.extract(0, width / 2) + << ".." + << ustr.extract(len - (width / 2 + width % 2), + width / 2 + width % 2); break; case ABBREVIATE: if (is_account) { std::list<string> parts; string::size_type beg = 0; - for (string::size_type pos = str.find(':'); + string strcopy(ustr.extract()); + for (string::size_type pos = strcopy.find(':'); pos != string::npos; - beg = pos + 1, pos = str.find(':', beg)) - parts.push_back(string(str, beg, pos - beg)); - parts.push_back(string(str, beg)); + beg = pos + 1, pos = strcopy.find(':', beg)) + parts.push_back(string(strcopy, beg, pos - beg)); + parts.push_back(string(strcopy, beg)); + + std::ostringstream result; - string result; unsigned int newlen = len; for (std::list<string>::iterator i = parts.begin(); i != parts.end(); @@ -367,28 +369,26 @@ string format_t::truncate(const string& str, unsigned int width, // Don't contract the last element std::list<string>::iterator x = i; if (++x == parts.end()) { - result += *i; + result << *i; break; } if (newlen > width) { - result += string(*i, 0, abbrev_length); - result += ":"; - newlen -= (*i).length() - abbrev_length; + unistring temp(*i); + result << temp.extract(0, abbrev_length) << ":"; + newlen -= temp.length() - abbrev_length; } else { - result += *i; - result += ":"; + result << *i << ":"; } } if (newlen > width) { // Even abbreviated its too big to show the last account, so // abbreviate all but the last and truncate at the beginning. - std::strncpy(buf, result.c_str() + (result.length() - width), width); - buf[0] = '.'; - buf[1] = '.'; + unistring temp(result.str()); + buf << ".." << temp.extract(temp.length() - width, width); } else { - std::strcpy(buf, result.c_str()); + buf << result.str(); } break; } @@ -396,14 +396,11 @@ string format_t::truncate(const string& str, unsigned int width, case TRUNCATE_TRAILING: // This method truncates at the end (the default). - std::strncpy(buf, str.c_str(), width - 2); - buf[width - 2] = '.'; - buf[width - 1] = '.'; + buf << ustr.extract(0, width -2) << ".."; break; } - buf[width] = '\0'; - return buf; + return buf.str(); } } // namespace ledger diff --git a/src/format.h b/src/format.h index 13a2fff2..fbfe452e 100644 --- a/src/format.h +++ b/src/format.h @@ -32,13 +32,63 @@ #ifndef _FORMAT_H #define _FORMAT_H +#define SUPPORT_UNICODE 1 + #include "journal.h" #include "expr.h" +#if defined(SUPPORT_UNICODE) +#include "utf8.h" +#endif namespace ledger { DECLARE_EXCEPTION(format_error, std::runtime_error); +#if defined(SUPPORT_UNICODE) +/** + * @class unistring + * + * @brief Abstract working with UTF-32 encoded Unicode strings + * + * The input to the string is a UTF8 encoded ledger::string, which can + * then have its true length be taken, or characters extracted. + */ +class unistring +{ + std::vector<uint32_t> utf32chars; + +public: + unistring(const string& input) + { + TRACE_CTOR(unistring, ""); + + const char * p = input.c_str(); + std::size_t len = input.length(); + + VERIFY(utf8::is_valid(p, p + len)); + + utf8::utf8to32(p, p + len, std::back_inserter(utf32chars)); + } + ~unistring() { + TRACE_DTOR(unistring); + } + + std::size_t length() const { + return utf32chars.size(); + } + + string extract(const std::size_t begin = 0, + const std::size_t len = 0) const + { + string utf8result; + utf8::utf32to8(utf32chars.begin() + begin, + utf32chars.begin() + begin + (len ? len : length()), + std::back_inserter(utf8result)); + return utf8result; + } +}; +#endif + class report_t; class format_t : public noncopyable @@ -135,7 +185,7 @@ public: elem->dump(out); } - static string truncate(const string& str, unsigned int width, + static string truncate(const unistring& str, unsigned int width, const bool is_account = false); }; diff --git a/src/report.cc b/src/report.cc index 2dfc9d76..4b45577b 100644 --- a/src/report.cc +++ b/src/report.cc @@ -789,6 +789,8 @@ expr_t::ptr_op_t report_t::lookup(const string& name) case 't': if (std::strcmp(p, "total_expr") == 0) return MAKE_FUNCTOR(report_t::get_total_expr); + else if (std::strcmp(p, "truncate") == 0) + return MAKE_FUNCTOR(report_t::get_total_expr); break; } diff --git a/src/session.cc b/src/session.cc index 9a3cccd5..8e78e9e9 100644 --- a/src/session.cc +++ b/src/session.cc @@ -71,9 +71,9 @@ void release_session_context() session_t::session_t() : register_format - ("%-.9(date) %-.20(payee) %-.23(account) %!12(print_balance(amount_expr, 12, 67)) " + ("%-.9(date) %-.20(payee) %-.23(account(23)) %!12(print_balance(amount_expr, 12, 67)) " "%!12(print_balance(display_total, 12, 80, true))\n%/" - "%31|%-.23(account) %!12(print_balance(amount_expr, 12, 67)) " + "%31|%-.23(account(23)) %!12(print_balance(amount_expr, 12, 67)) " "%!12(print_balance(display_total, 12, 80, true))\n"), wide_register_format ("%-.9D %-.35P %-.39A %22.108t %!22.132T\n%/" diff --git a/src/textual.cc b/src/textual.cc index e1a08f69..4600f3f3 100644 --- a/src/textual.cc +++ b/src/textual.cc @@ -596,10 +596,16 @@ static inline void parse_symbol(char *& p, string& symbol) bool textual_parser_t::test(std::istream& in) const { - char buf[5]; + char buf[12]; + char * p; - in.read(buf, 5); - if (std::strncmp(buf, "<?xml", 5) == 0) { + in.read(buf, 11); + if (utf8::is_bom(buf)) + p = &buf[3]; + else + p = buf; + + if (std::strncmp(p, "<?xml", 5) == 0) { #if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) throw parse_error("Ledger file contains XML data, but format was not recognized"); #else @@ -688,10 +694,11 @@ unsigned int textual_parser_t::parse(std::istream& in, { TRACE_START(parsing_total, 1, "Total time spent parsing text:"); - static bool added_auto_entry_hook = false; - static char line[MAX_LINE + 1]; - unsigned int count = 0; - unsigned int errors = 0; + static bool added_auto_entry_hook = false; + static char linebuf[MAX_LINE + 1]; + char * line; + unsigned int count = 0; + unsigned int errors = 0; std::list<account_t *> account_stack; auto_entry_finalizer_t auto_entry_finalizer(&journal); @@ -714,10 +721,15 @@ unsigned int textual_parser_t::parse(std::istream& in, while (in.good() && ! in.eof()) { try { - in.getline(line, MAX_LINE); + in.getline(linebuf, MAX_LINE); if (in.eof()) break; + if (linenum == 1 && utf8::is_bom(linebuf)) + line = &linebuf[3]; + else + line = linebuf; + int len = std::strlen(line); if (line[len - 1] == '\r') line[--len] = '\0'; |