summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-01-23 19:49:22 -0400
committerJohn Wiegley <johnw@newartisans.com>2009-01-23 19:50:00 -0400
commit900a92e1158cb178335d16ff0912f5fc5701da32 (patch)
tree7da8de9f4a95a18659434c1968c8af43ee34f435 /src
parentf52e04c2bac1d4900bfe8963f369178f7f76023f (diff)
downloadfork-ledger-900a92e1158cb178335d16ff0912f5fc5701da32.tar.gz
fork-ledger-900a92e1158cb178335d16ff0912f5fc5701da32.tar.bz2
fork-ledger-900a92e1158cb178335d16ff0912f5fc5701da32.zip
Added support for Unicode text in Ledger files, thanks to 'utfcpp', which can
be located at http://utfcpp.sourceforge.net.
Diffstat (limited to 'src')
-rw-r--r--src/format.cc81
-rw-r--r--src/format.h52
-rw-r--r--src/report.cc2
-rw-r--r--src/session.cc4
-rw-r--r--src/textual.cc28
5 files changed, 114 insertions, 53 deletions
diff --git a/src/format.cc b/src/format.cc
index d1f606d3..a4596761 100644
--- a/src/format.cc
+++ b/src/format.cc
@@ -308,58 +308,60 @@ void format_t::format(std::ostream& out_str, scope_t& scope)
break;
}
- string temp = out.str();
-
- DEBUG("format.expr", "output = \"" << temp << "\"");
+ unistring temp(out.str());
+ string result;
if (! elem->has_flags(ELEMENT_FORMATTED) &&
- elem->max_width > 0 && elem->max_width < temp.length())
- out_str << truncate(temp, elem->max_width);
- else
- out_str << temp;
+ elem->max_width > 0 && elem->max_width < temp.length()) {
+ result = truncate(temp, elem->max_width);
+ } else {
+ result = temp.extract();
+ for (int i = 0; i < (int)elem->min_width - (int)temp.length(); i++)
+ result += " ";
+ }
+
+ out_str << result;
}
}
-string format_t::truncate(const string& str, unsigned int width,
+string format_t::truncate(const unistring& ustr, unsigned int width,
const bool is_account)
{
- const unsigned int len = str.length();
- if (len <= width)
- return str;
-
assert(width < 4095);
- char buf[4096];
+ const unsigned int len = ustr.length();
+ if (len <= width)
+ return ustr.extract();
+
+ std::ostringstream buf;
switch (elision_style) {
case TRUNCATE_LEADING:
// This method truncates at the beginning.
- std::strncpy(buf, str.c_str() + (len - width), width);
- buf[0] = '.';
- buf[1] = '.';
+ buf << ".." << ustr.extract(len - width, width);
break;
case TRUNCATE_MIDDLE:
// This method truncates in the middle.
- std::strncpy(buf, str.c_str(), width / 2);
- std::strncpy(buf + width / 2,
- str.c_str() + (len - (width / 2 + width % 2)),
- width / 2 + width % 2);
- buf[width / 2 - 1] = '.';
- buf[width / 2] = '.';
+ buf << ustr.extract(0, width / 2)
+ << ".."
+ << ustr.extract(len - (width / 2 + width % 2),
+ width / 2 + width % 2);
break;
case ABBREVIATE:
if (is_account) {
std::list<string> parts;
string::size_type beg = 0;
- for (string::size_type pos = str.find(':');
+ string strcopy(ustr.extract());
+ for (string::size_type pos = strcopy.find(':');
pos != string::npos;
- beg = pos + 1, pos = str.find(':', beg))
- parts.push_back(string(str, beg, pos - beg));
- parts.push_back(string(str, beg));
+ beg = pos + 1, pos = strcopy.find(':', beg))
+ parts.push_back(string(strcopy, beg, pos - beg));
+ parts.push_back(string(strcopy, beg));
+
+ std::ostringstream result;
- string result;
unsigned int newlen = len;
for (std::list<string>::iterator i = parts.begin();
i != parts.end();
@@ -367,28 +369,26 @@ string format_t::truncate(const string& str, unsigned int width,
// Don't contract the last element
std::list<string>::iterator x = i;
if (++x == parts.end()) {
- result += *i;
+ result << *i;
break;
}
if (newlen > width) {
- result += string(*i, 0, abbrev_length);
- result += ":";
- newlen -= (*i).length() - abbrev_length;
+ unistring temp(*i);
+ result << temp.extract(0, abbrev_length) << ":";
+ newlen -= temp.length() - abbrev_length;
} else {
- result += *i;
- result += ":";
+ result << *i << ":";
}
}
if (newlen > width) {
// Even abbreviated its too big to show the last account, so
// abbreviate all but the last and truncate at the beginning.
- std::strncpy(buf, result.c_str() + (result.length() - width), width);
- buf[0] = '.';
- buf[1] = '.';
+ unistring temp(result.str());
+ buf << ".." << temp.extract(temp.length() - width, width);
} else {
- std::strcpy(buf, result.c_str());
+ buf << result.str();
}
break;
}
@@ -396,14 +396,11 @@ string format_t::truncate(const string& str, unsigned int width,
case TRUNCATE_TRAILING:
// This method truncates at the end (the default).
- std::strncpy(buf, str.c_str(), width - 2);
- buf[width - 2] = '.';
- buf[width - 1] = '.';
+ buf << ustr.extract(0, width -2) << "..";
break;
}
- buf[width] = '\0';
- return buf;
+ return buf.str();
}
} // namespace ledger
diff --git a/src/format.h b/src/format.h
index 13a2fff2..fbfe452e 100644
--- a/src/format.h
+++ b/src/format.h
@@ -32,13 +32,63 @@
#ifndef _FORMAT_H
#define _FORMAT_H
+#define SUPPORT_UNICODE 1
+
#include "journal.h"
#include "expr.h"
+#if defined(SUPPORT_UNICODE)
+#include "utf8.h"
+#endif
namespace ledger {
DECLARE_EXCEPTION(format_error, std::runtime_error);
+#if defined(SUPPORT_UNICODE)
+/**
+ * @class unistring
+ *
+ * @brief Abstract working with UTF-32 encoded Unicode strings
+ *
+ * The input to the string is a UTF8 encoded ledger::string, which can
+ * then have its true length be taken, or characters extracted.
+ */
+class unistring
+{
+ std::vector<uint32_t> utf32chars;
+
+public:
+ unistring(const string& input)
+ {
+ TRACE_CTOR(unistring, "");
+
+ const char * p = input.c_str();
+ std::size_t len = input.length();
+
+ VERIFY(utf8::is_valid(p, p + len));
+
+ utf8::utf8to32(p, p + len, std::back_inserter(utf32chars));
+ }
+ ~unistring() {
+ TRACE_DTOR(unistring);
+ }
+
+ std::size_t length() const {
+ return utf32chars.size();
+ }
+
+ string extract(const std::size_t begin = 0,
+ const std::size_t len = 0) const
+ {
+ string utf8result;
+ utf8::utf32to8(utf32chars.begin() + begin,
+ utf32chars.begin() + begin + (len ? len : length()),
+ std::back_inserter(utf8result));
+ return utf8result;
+ }
+};
+#endif
+
class report_t;
class format_t : public noncopyable
@@ -135,7 +185,7 @@ public:
elem->dump(out);
}
- static string truncate(const string& str, unsigned int width,
+ static string truncate(const unistring& str, unsigned int width,
const bool is_account = false);
};
diff --git a/src/report.cc b/src/report.cc
index 2dfc9d76..4b45577b 100644
--- a/src/report.cc
+++ b/src/report.cc
@@ -789,6 +789,8 @@ expr_t::ptr_op_t report_t::lookup(const string& name)
case 't':
if (std::strcmp(p, "total_expr") == 0)
return MAKE_FUNCTOR(report_t::get_total_expr);
+ else if (std::strcmp(p, "truncate") == 0)
+ return MAKE_FUNCTOR(report_t::get_total_expr);
break;
}
diff --git a/src/session.cc b/src/session.cc
index 9a3cccd5..8e78e9e9 100644
--- a/src/session.cc
+++ b/src/session.cc
@@ -71,9 +71,9 @@ void release_session_context()
session_t::session_t()
: register_format
- ("%-.9(date) %-.20(payee) %-.23(account) %!12(print_balance(amount_expr, 12, 67)) "
+ ("%-.9(date) %-.20(payee) %-.23(account(23)) %!12(print_balance(amount_expr, 12, 67)) "
"%!12(print_balance(display_total, 12, 80, true))\n%/"
- "%31|%-.23(account) %!12(print_balance(amount_expr, 12, 67)) "
+ "%31|%-.23(account(23)) %!12(print_balance(amount_expr, 12, 67)) "
"%!12(print_balance(display_total, 12, 80, true))\n"),
wide_register_format
("%-.9D %-.35P %-.39A %22.108t %!22.132T\n%/"
diff --git a/src/textual.cc b/src/textual.cc
index e1a08f69..4600f3f3 100644
--- a/src/textual.cc
+++ b/src/textual.cc
@@ -596,10 +596,16 @@ static inline void parse_symbol(char *& p, string& symbol)
bool textual_parser_t::test(std::istream& in) const
{
- char buf[5];
+ char buf[12];
+ char * p;
- in.read(buf, 5);
- if (std::strncmp(buf, "<?xml", 5) == 0) {
+ in.read(buf, 11);
+ if (utf8::is_bom(buf))
+ p = &buf[3];
+ else
+ p = buf;
+
+ if (std::strncmp(p, "<?xml", 5) == 0) {
#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
throw parse_error("Ledger file contains XML data, but format was not recognized");
#else
@@ -688,10 +694,11 @@ unsigned int textual_parser_t::parse(std::istream& in,
{
TRACE_START(parsing_total, 1, "Total time spent parsing text:");
- static bool added_auto_entry_hook = false;
- static char line[MAX_LINE + 1];
- unsigned int count = 0;
- unsigned int errors = 0;
+ static bool added_auto_entry_hook = false;
+ static char linebuf[MAX_LINE + 1];
+ char * line;
+ unsigned int count = 0;
+ unsigned int errors = 0;
std::list<account_t *> account_stack;
auto_entry_finalizer_t auto_entry_finalizer(&journal);
@@ -714,10 +721,15 @@ unsigned int textual_parser_t::parse(std::istream& in,
while (in.good() && ! in.eof()) {
try {
- in.getline(line, MAX_LINE);
+ in.getline(linebuf, MAX_LINE);
if (in.eof())
break;
+ if (linenum == 1 && utf8::is_bom(linebuf))
+ line = &linebuf[3];
+ else
+ line = linebuf;
+
int len = std::strlen(line);
if (line[len - 1] == '\r')
line[--len] = '\0';