summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--Makefile.am3
-rw-r--r--doc/LICENSE-utfcpp23
-rw-r--r--doc/sample.dat10
m---------lib/utfcpp0
-rw-r--r--src/format.cc81
-rw-r--r--src/format.h52
-rw-r--r--src/report.cc2
-rw-r--r--src/session.cc4
-rw-r--r--src/textual.cc28
10 files changed, 151 insertions, 55 deletions
diff --git a/.gitmodules b/.gitmodules
index b685b019..33b949c0 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,6 @@
[submodule "lib/libofx"]
path = lib/libofx
url = git://newartisans.com/libofx.git
+[submodule "lib/utfcpp"]
+ path = lib/utfcpp
+ url = git://github.com/jwiegley/utfcpp.git
diff --git a/Makefile.am b/Makefile.am
index c360bf96..5c8384a9 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -5,7 +5,8 @@ EXTRA_DIST = autogen.sh contrib
lib_LTLIBRARIES = libamounts.la libledger.la
-libamounts_la_CPPFLAGS = -I$(srcdir)/src -I$(srcdir)/lib
+libamounts_la_CPPFLAGS = -I$(srcdir)/src -I$(srcdir)/lib \
+ -I$(srcdir)/lib/utfcpp/source
if HAVE_GDTOA
libamounts_la_CPPFLAGS += -I$(top_builddir)/lib/gdtoa -I$(srcdir)/lib/gdtoa
endif
diff --git a/doc/LICENSE-utfcpp b/doc/LICENSE-utfcpp
new file mode 100644
index 00000000..1751a003
--- /dev/null
+++ b/doc/LICENSE-utfcpp
@@ -0,0 +1,23 @@
+Copyright 2006 Nemanja Trifunovic
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/doc/sample.dat b/doc/sample.dat
index 907f19ab..a579dd0a 100644
--- a/doc/sample.dat
+++ b/doc/sample.dat
@@ -15,10 +15,18 @@ N $
Assets:Brokerage 50 AAPL @ $30.00
Equity:Opening Balances
-2004/05/14 * Pay day
+2004/05/14 * Páy dày
Assets:Bank:Checking $500.00
Income:Salary
+2004/05/14 * Another dày in which there is Páying
+ Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
+ Income:Salary
+
+2004/05/14 * Another dày in which there is Páying
+ Русский язык:Русский язык:Русский язык:Русский язык $1000.00
+ Income:Salary
+
2004/05/27 Book Store
Expenses:Books $20.00
Liabilities:MasterCard
diff --git a/lib/utfcpp b/lib/utfcpp
new file mode 160000
+Subproject 54676a423c356bf128f9c8fc0e7ea68cbcb7587
diff --git a/src/format.cc b/src/format.cc
index d1f606d3..a4596761 100644
--- a/src/format.cc
+++ b/src/format.cc
@@ -308,58 +308,60 @@ void format_t::format(std::ostream& out_str, scope_t& scope)
break;
}
- string temp = out.str();
-
- DEBUG("format.expr", "output = \"" << temp << "\"");
+ unistring temp(out.str());
+ string result;
if (! elem->has_flags(ELEMENT_FORMATTED) &&
- elem->max_width > 0 && elem->max_width < temp.length())
- out_str << truncate(temp, elem->max_width);
- else
- out_str << temp;
+ elem->max_width > 0 && elem->max_width < temp.length()) {
+ result = truncate(temp, elem->max_width);
+ } else {
+ result = temp.extract();
+ for (int i = 0; i < (int)elem->min_width - (int)temp.length(); i++)
+ result += " ";
+ }
+
+ out_str << result;
}
}
-string format_t::truncate(const string& str, unsigned int width,
+string format_t::truncate(const unistring& ustr, unsigned int width,
const bool is_account)
{
- const unsigned int len = str.length();
- if (len <= width)
- return str;
-
assert(width < 4095);
- char buf[4096];
+ const unsigned int len = ustr.length();
+ if (len <= width)
+ return ustr.extract();
+
+ std::ostringstream buf;
switch (elision_style) {
case TRUNCATE_LEADING:
// This method truncates at the beginning.
- std::strncpy(buf, str.c_str() + (len - width), width);
- buf[0] = '.';
- buf[1] = '.';
+ buf << ".." << ustr.extract(len - width, width);
break;
case TRUNCATE_MIDDLE:
// This method truncates in the middle.
- std::strncpy(buf, str.c_str(), width / 2);
- std::strncpy(buf + width / 2,
- str.c_str() + (len - (width / 2 + width % 2)),
- width / 2 + width % 2);
- buf[width / 2 - 1] = '.';
- buf[width / 2] = '.';
+ buf << ustr.extract(0, width / 2)
+ << ".."
+ << ustr.extract(len - (width / 2 + width % 2),
+ width / 2 + width % 2);
break;
case ABBREVIATE:
if (is_account) {
std::list<string> parts;
string::size_type beg = 0;
- for (string::size_type pos = str.find(':');
+ string strcopy(ustr.extract());
+ for (string::size_type pos = strcopy.find(':');
pos != string::npos;
- beg = pos + 1, pos = str.find(':', beg))
- parts.push_back(string(str, beg, pos - beg));
- parts.push_back(string(str, beg));
+ beg = pos + 1, pos = strcopy.find(':', beg))
+ parts.push_back(string(strcopy, beg, pos - beg));
+ parts.push_back(string(strcopy, beg));
+
+ std::ostringstream result;
- string result;
unsigned int newlen = len;
for (std::list<string>::iterator i = parts.begin();
i != parts.end();
@@ -367,28 +369,26 @@ string format_t::truncate(const string& str, unsigned int width,
// Don't contract the last element
std::list<string>::iterator x = i;
if (++x == parts.end()) {
- result += *i;
+ result << *i;
break;
}
if (newlen > width) {
- result += string(*i, 0, abbrev_length);
- result += ":";
- newlen -= (*i).length() - abbrev_length;
+ unistring temp(*i);
+ result << temp.extract(0, abbrev_length) << ":";
+ newlen -= temp.length() - abbrev_length;
} else {
- result += *i;
- result += ":";
+ result << *i << ":";
}
}
if (newlen > width) {
// Even abbreviated its too big to show the last account, so
// abbreviate all but the last and truncate at the beginning.
- std::strncpy(buf, result.c_str() + (result.length() - width), width);
- buf[0] = '.';
- buf[1] = '.';
+ unistring temp(result.str());
+ buf << ".." << temp.extract(temp.length() - width, width);
} else {
- std::strcpy(buf, result.c_str());
+ buf << result.str();
}
break;
}
@@ -396,14 +396,11 @@ string format_t::truncate(const string& str, unsigned int width,
case TRUNCATE_TRAILING:
// This method truncates at the end (the default).
- std::strncpy(buf, str.c_str(), width - 2);
- buf[width - 2] = '.';
- buf[width - 1] = '.';
+ buf << ustr.extract(0, width -2) << "..";
break;
}
- buf[width] = '\0';
- return buf;
+ return buf.str();
}
} // namespace ledger
diff --git a/src/format.h b/src/format.h
index 13a2fff2..fbfe452e 100644
--- a/src/format.h
+++ b/src/format.h
@@ -32,13 +32,63 @@
#ifndef _FORMAT_H
#define _FORMAT_H
+#define SUPPORT_UNICODE 1
+
#include "journal.h"
#include "expr.h"
+#if defined(SUPPORT_UNICODE)
+#include "utf8.h"
+#endif
namespace ledger {
DECLARE_EXCEPTION(format_error, std::runtime_error);
+#if defined(SUPPORT_UNICODE)
+/**
+ * @class unistring
+ *
+ * @brief Abstract working with UTF-32 encoded Unicode strings
+ *
+ * The input to the string is a UTF8 encoded ledger::string, which can
+ * then have its true length be taken, or characters extracted.
+ */
+class unistring
+{
+ std::vector<uint32_t> utf32chars;
+
+public:
+ unistring(const string& input)
+ {
+ TRACE_CTOR(unistring, "");
+
+ const char * p = input.c_str();
+ std::size_t len = input.length();
+
+ VERIFY(utf8::is_valid(p, p + len));
+
+ utf8::utf8to32(p, p + len, std::back_inserter(utf32chars));
+ }
+ ~unistring() {
+ TRACE_DTOR(unistring);
+ }
+
+ std::size_t length() const {
+ return utf32chars.size();
+ }
+
+ string extract(const std::size_t begin = 0,
+ const std::size_t len = 0) const
+ {
+ string utf8result;
+ utf8::utf32to8(utf32chars.begin() + begin,
+ utf32chars.begin() + begin + (len ? len : length()),
+ std::back_inserter(utf8result));
+ return utf8result;
+ }
+};
+#endif
+
class report_t;
class format_t : public noncopyable
@@ -135,7 +185,7 @@ public:
elem->dump(out);
}
- static string truncate(const string& str, unsigned int width,
+ static string truncate(const unistring& str, unsigned int width,
const bool is_account = false);
};
diff --git a/src/report.cc b/src/report.cc
index 2dfc9d76..4b45577b 100644
--- a/src/report.cc
+++ b/src/report.cc
@@ -789,6 +789,8 @@ expr_t::ptr_op_t report_t::lookup(const string& name)
case 't':
if (std::strcmp(p, "total_expr") == 0)
return MAKE_FUNCTOR(report_t::get_total_expr);
+ else if (std::strcmp(p, "truncate") == 0)
+ return MAKE_FUNCTOR(report_t::get_total_expr);
break;
}
diff --git a/src/session.cc b/src/session.cc
index 9a3cccd5..8e78e9e9 100644
--- a/src/session.cc
+++ b/src/session.cc
@@ -71,9 +71,9 @@ void release_session_context()
session_t::session_t()
: register_format
- ("%-.9(date) %-.20(payee) %-.23(account) %!12(print_balance(amount_expr, 12, 67)) "
+ ("%-.9(date) %-.20(payee) %-.23(account(23)) %!12(print_balance(amount_expr, 12, 67)) "
"%!12(print_balance(display_total, 12, 80, true))\n%/"
- "%31|%-.23(account) %!12(print_balance(amount_expr, 12, 67)) "
+ "%31|%-.23(account(23)) %!12(print_balance(amount_expr, 12, 67)) "
"%!12(print_balance(display_total, 12, 80, true))\n"),
wide_register_format
("%-.9D %-.35P %-.39A %22.108t %!22.132T\n%/"
diff --git a/src/textual.cc b/src/textual.cc
index e1a08f69..4600f3f3 100644
--- a/src/textual.cc
+++ b/src/textual.cc
@@ -596,10 +596,16 @@ static inline void parse_symbol(char *& p, string& symbol)
bool textual_parser_t::test(std::istream& in) const
{
- char buf[5];
+ char buf[12];
+ char * p;
- in.read(buf, 5);
- if (std::strncmp(buf, "<?xml", 5) == 0) {
+ in.read(buf, 11);
+ if (utf8::is_bom(buf))
+ p = &buf[3];
+ else
+ p = buf;
+
+ if (std::strncmp(p, "<?xml", 5) == 0) {
#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
throw parse_error("Ledger file contains XML data, but format was not recognized");
#else
@@ -688,10 +694,11 @@ unsigned int textual_parser_t::parse(std::istream& in,
{
TRACE_START(parsing_total, 1, "Total time spent parsing text:");
- static bool added_auto_entry_hook = false;
- static char line[MAX_LINE + 1];
- unsigned int count = 0;
- unsigned int errors = 0;
+ static bool added_auto_entry_hook = false;
+ static char linebuf[MAX_LINE + 1];
+ char * line;
+ unsigned int count = 0;
+ unsigned int errors = 0;
std::list<account_t *> account_stack;
auto_entry_finalizer_t auto_entry_finalizer(&journal);
@@ -714,10 +721,15 @@ unsigned int textual_parser_t::parse(std::istream& in,
while (in.good() && ! in.eof()) {
try {
- in.getline(line, MAX_LINE);
+ in.getline(linebuf, MAX_LINE);
if (in.eof())
break;
+ if (linenum == 1 && utf8::is_bom(linebuf))
+ line = &linebuf[3];
+ else
+ line = linebuf;
+
int len = std::strlen(line);
if (line[len - 1] == '\r')
line[--len] = '\0';