summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-02-12 02:34:39 -0400
committerJohn Wiegley <johnw@newartisans.com>2009-02-12 02:34:39 -0400
commit6f2e3b88649543d625b76ac5542dd8838b28f0b1 (patch)
treeaf76d46718d4b2b3e51c05c3c53a09987505f02f
parent9c9320bc586bc80922921945bfe6bb704c36c624 (diff)
downloadfork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.tar.gz
fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.tar.bz2
fork-ledger-6f2e3b88649543d625b76ac5542dd8838b28f0b1.zip
Properly handle UTF-8 characters in commodity strings.
-rw-r--r--Makefile.am1
-rw-r--r--doc/sample.dat46
-rw-r--r--src/amount.cc15
-rw-r--r--src/amount.h1
-rw-r--r--src/balance.cc12
-rw-r--r--src/commodity.cc50
-rw-r--r--src/format.h48
-rw-r--r--src/report.cc5
-rw-r--r--src/system.hh3
-rw-r--r--src/utils.h76
-rw-r--r--src/value.cc2
11 files changed, 136 insertions, 123 deletions
diff --git a/Makefile.am b/Makefile.am
index b4c21869..22110b57 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -84,6 +84,7 @@ pkginclude_HEADERS = \
src/mask.h \
src/stream.h \
src/pstream.h \
+ src/unistring.h \
\
src/amount.h \
src/commodity.h \
diff --git a/doc/sample.dat b/doc/sample.dat
index b35991fe..5b34157e 100644
--- a/doc/sample.dat
+++ b/doc/sample.dat
@@ -1,42 +1,42 @@
N $
= account =~ /^Expenses:Books/
- (Liabilities:Taxes) -0.10
+ (Liabilities:Taxes) -0.10
~ Monthly
- Assets:Bank:Checking $500.00
- Income:Salary
+ Assets:Bank:Checking $500.00
+ Income:Salary
2004/05/01 * Checking balance
- Assets:Bank:Checking $1,000.00
- Equity:Opening Balances
+ Assets:Bank:Checking $1,000.00
+ Equity:Opening Balances
2004/05/03=2004/05/01 * Investment balance
- Assets:Brokerage 50 AAPL @ $30.00
- Equity:Opening Balances
+ Assets:Brokerage 50 AAPL @ $30.00
+ Equity:Opening Balances
2004/05/14 * Páy dày
- Assets:Bank:Checking $500.00
- Income:Salary
+ Assets:Bank:Checking 500.00€
+ Income:Salary
2004/05/14 * Another dày in which there is Páying
- Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
- Income:Salary
+ Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
+ Income:Salary
2004/05/14 * Another dày in which there is Páying
- Русский язык:Русский язык:Русский язык:Русский язык $1000.00
- Income:Salary
+ Русский язык:Русский язык:Русский язык:Русский язык $1000.00
+ Income:Salary
2004/05/27 Book Store
- Expenses:Books $20.00
- Liabilities:MasterCard
+ Expenses:Books $20.00
+ Liabilities:MasterCard
2004/05/27 (100) Credit card company
- ; This is an entry note!
- ; Sample: Value
- Liabilities:MasterCard $20.00
- ; This is a transaction note!
- ; Sample: Another Value
- ; :MyTag:
- Assets:Bank:Checking
- ; :AnotherTag:
+ ; This is an entry note!
+ ; Sample: Value
+ Liabilities:MasterCard $20.00
+ ; This is a transaction note!
+ ; Sample: Another Value
+ ; :MyTag:
+ Assets:Bank:Checking
+ ; :AnotherTag:
diff --git a/src/amount.cc b/src/amount.cc
index 3fb8ddaf..ffeaeea6 100644
--- a/src/amount.cc
+++ b/src/amount.cc
@@ -31,6 +31,7 @@
#include "amount.h"
#include "commodity.h"
+#include "unistring.h"
namespace ledger {
@@ -1001,6 +1002,20 @@ void amount_t::print(std::ostream& _out) const
_out << out.str();
}
+void amount_t::right_justify(std::ostream& out, int width) const
+{
+ std::ostringstream buf;
+ buf << *this;
+
+ unistring temp(buf.str());
+
+ int spacing = width - int(temp.length());
+ while (spacing-- > 0)
+ out << ' ';
+
+ out << temp.extract();
+}
+
bool amount_t::valid() const
{
if (quantity) {
diff --git a/src/amount.h b/src/amount.h
index 0943a4c0..a9077a4b 100644
--- a/src/amount.h
+++ b/src/amount.h
@@ -640,6 +640,7 @@ public:
of its commodity's display precision.
*/
void print(std::ostream& out) const;
+ void right_justify(std::ostream& out, int width) const;
/*@}*/
diff --git a/src/balance.cc b/src/balance.cc
index c9ca148d..7f8ed572 100644
--- a/src/balance.cc
+++ b/src/balance.cc
@@ -240,17 +240,11 @@ void balance_t::print(std::ostream& out,
first = false;
width = first_width;
}
-
- out.width(width);
- out.fill(' ');
- out << std::right << *amount;
+ amount->right_justify(out, width);
}
- if (first) {
- out.width(first_width);
- out.fill(' ');
- out << std::right << "0";
- }
+ if (first)
+ amount_t(0L).right_justify(out, first_width);
}
} // namespace ledger
diff --git a/src/commodity.cc b/src/commodity.cc
index 5169e3af..2dfba880 100644
--- a/src/commodity.cc
+++ b/src/commodity.cc
@@ -571,7 +571,55 @@ void commodity_t::parse_symbol(std::istream& in, string& symbol)
else
throw_(amount_error, "Quoted commodity symbol lacks closing quote");
} else {
- READ_INTO(in, buf, 255, c, ! invalid_chars[static_cast<unsigned char>(c)]);
+ char * _p = buf;
+ c = in.peek();
+ while (_p - buf < 255 && in.good() && ! in.eof() && c != '\n') {
+ int bytes = 0;
+ int size = _p - buf;
+
+ unsigned char d = c;
+
+ // Check for the start of a UTF-8 multi-byte encoded string
+ if (d >= 192 && d <= 223 && size < 254)
+ bytes = 2;
+ else if (d >= 224 && d <= 239 && size < 253)
+ bytes = 3;
+ else if (d >= 240 && d <= 247 && size < 252)
+ bytes = 4;
+ else if (d >= 248 && d <= 251 && size < 251)
+ bytes = 5;
+ else if (d >= 252 && d <= 253 && size < 250)
+ bytes = 6;
+ else if (d >= 254) // UTF-8 encoding error
+ break;
+
+ if (bytes > 0) { // we're looking at a UTF-8 encoding
+ for (int i = 0; i < bytes; i++) {
+ in.get(c);
+ if (in.bad() || in.eof())
+ break;
+ *_p++ = c;
+ }
+ }
+ else if (invalid_chars[static_cast<unsigned char>(c)]) {
+ break;
+ }
+ else {
+ in.get(c);
+ if (in.eof())
+ break;
+ if (c == '\\') {
+ in.get(c);
+ if (in.eof())
+ break;
+ }
+ *_p++ = c;
+ }
+
+ c = in.peek();
+ }
+ *_p = '\0';
+
if (is_reserved_token(buf))
buf[0] = '\0';
}
diff --git a/src/format.h b/src/format.h
index 49350104..d1716dbf 100644
--- a/src/format.h
+++ b/src/format.h
@@ -47,58 +47,12 @@
#define _FORMAT_H
#include "expr.h"
+#include "unistring.h"
namespace ledger {
DECLARE_EXCEPTION(format_error, std::runtime_error);
-#if defined(SUPPORT_UNICODE)
-
-/**
- * @class unistring
- *
- * @brief Abstract working with UTF-32 encoded Unicode strings
- *
- * The input to the string is a UTF8 encoded ledger::string, which can
- * then have its true length be taken, or characters extracted.
- */
-class unistring
-{
- std::vector<uint32_t> utf32chars;
-
-public:
- unistring(const string& input)
- {
- TRACE_CTOR(unistring, "");
-
- const char * p = input.c_str();
- std::size_t len = input.length();
-
- VERIFY(utf8::is_valid(p, p + len));
-
- utf8::utf8to32(p, p + len, std::back_inserter(utf32chars));
- }
- ~unistring() {
- TRACE_DTOR(unistring);
- }
-
- std::size_t length() const {
- return utf32chars.size();
- }
-
- string extract(const std::size_t begin = 0,
- const std::size_t len = 0) const
- {
- string utf8result;
- utf8::utf32to8(utf32chars.begin() + begin,
- utf32chars.begin() + begin + (len ? len : length()),
- std::back_inserter(utf8result));
- return utf8result;
- }
-};
-
-#endif // SUPPORT_UNICODE
-
class report_t;
/**
diff --git a/src/report.cc b/src/report.cc
index e319c8aa..7d8353d9 100644
--- a/src/report.cc
+++ b/src/report.cc
@@ -68,7 +68,8 @@ report_t::report_t(session_t& _session) : session(_session)
" %12(amount)%(comment | \"\")\n%/\n");
HANDLER(balance_format_).on(
- "%20(strip(display_total)) %(depth_spacer)%-(partial_account)\n");
+ "%20(print_balance(strip(display_total), 20))"
+ " %(depth_spacer)%-(partial_account)\n");
HANDLER(equity_format_).on("\n%D %Y%C%P\n%/ %-34W %12t\n");
@@ -191,7 +192,7 @@ value_t report_t::fn_print_balance(call_scope_t& args)
std::ostringstream out;
args[0].strip_annotations(what_to_keep())
- .print(out, *first_width, *latter_width,
+ .print(out, *first_width, latter_width ? *latter_width : -1,
HANDLED(date_format_) ?
HANDLER(date_format_).str() : optional<string>());
diff --git a/src/system.hh b/src/system.hh
index bffd041c..96b574e5 100644
--- a/src/system.hh
+++ b/src/system.hh
@@ -138,10 +138,7 @@ typedef std::ostream::pos_type ostream_pos_type;
#include <gmp.h>
#include <mpfr.h>
#include "sha1.h"
-#define SUPPORT_UNICODE 1
-#if defined(SUPPORT_UNICODE)
#include "utf8.h"
-#endif
#ifdef HAVE_LIBEDIT
#include <editline/readline.h>
diff --git a/src/utils.h b/src/utils.h
index d4011442..93cb9a4a 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -546,50 +546,52 @@ inline char * next_element(char * buf, bool variable = false) {
inline char peek_next_nonws(std::istream& in) {
char c = in.peek();
- while (! in.eof() && std::isspace(c)) {
+ while (in.good() && ! in.eof() && std::isspace(c)) {
in.get(c);
c = in.peek();
}
return c;
}
-#define READ_INTO(str, targ, size, var, cond) { \
- char * _p = targ; \
- var = str.peek(); \
- while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \
- str.get(var); \
- if (str.eof()) \
- break; \
- if (var == '\\') { \
- str.get(var); \
- if (in.eof()) \
- break; \
- } \
- *_p++ = var; \
- var = str.peek(); \
- } \
- *_p = '\0'; \
-}
+#define READ_INTO(str, targ, size, var, cond) { \
+ char * _p = targ; \
+ var = str.peek(); \
+ while (str.good() && ! str.eof() && var != '\n' && \
+ (cond) && _p - targ < size) { \
+ str.get(var); \
+ if (str.eof()) \
+ break; \
+ if (var == '\\') { \
+ str.get(var); \
+ if (in.eof()) \
+ break; \
+ } \
+ *_p++ = var; \
+ var = str.peek(); \
+ } \
+ *_p = '\0'; \
+ }
-#define READ_INTO_(str, targ, size, var, idx, cond) { \
- char * _p = targ; \
- var = str.peek(); \
- while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \
- str.get(var); \
- if (str.eof()) \
- break; \
- idx++; \
- if (var == '\\') { \
- str.get(var); \
- if (in.eof()) \
- break; \
- idx++; \
- } \
- *_p++ = var; \
- var = str.peek(); \
- } \
- *_p = '\0'; \
-}
+#define READ_INTO_(str, targ, size, var, idx, cond) { \
+ char * _p = targ; \
+ var = str.peek(); \
+ while (str.good() && ! str.eof() && var != '\n' && \
+ (cond) && _p - targ < size) { \
+ str.get(var); \
+ if (str.eof()) \
+ break; \
+ idx++; \
+ if (var == '\\') { \
+ str.get(var); \
+ if (in.eof()) \
+ break; \
+ idx++; \
+ } \
+ *_p++ = var; \
+ var = str.peek(); \
+ } \
+ *_p = '\0'; \
+ }
} // namespace ledger
diff --git a/src/value.cc b/src/value.cc
index 482dca21..a39eada2 100644
--- a/src/value.cc
+++ b/src/value.cc
@@ -1271,7 +1271,7 @@ void value_t::print(std::ostream& out,
break;
case AMOUNT:
- out << as_amount();
+ as_amount().right_justify(out, first_width);
break;
case STRING: