From 900a92e1158cb178335d16ff0912f5fc5701da32 Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Fri, 23 Jan 2009 19:49:22 -0400 Subject: Added support for Unicode text in Ledger files, thanks to 'utfcpp', which can be located at http://utfcpp.sourceforge.net. --- src/format.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) (limited to 'src/format.h') diff --git a/src/format.h b/src/format.h index 13a2fff2..fbfe452e 100644 --- a/src/format.h +++ b/src/format.h @@ -32,13 +32,63 @@ #ifndef _FORMAT_H #define _FORMAT_H +#define SUPPORT_UNICODE 1 + #include "journal.h" #include "expr.h" +#if defined(SUPPORT_UNICODE) +#include "utf8.h" +#endif namespace ledger { DECLARE_EXCEPTION(format_error, std::runtime_error); +#if defined(SUPPORT_UNICODE) +/** + * @class unistring + * + * @brief Abstract working with UTF-32 encoded Unicode strings + * + * The input to the string is a UTF8 encoded ledger::string, which can + * then have its true length be taken, or characters extracted. + */ +class unistring +{ + std::vector utf32chars; + +public: + unistring(const string& input) + { + TRACE_CTOR(unistring, ""); + + const char * p = input.c_str(); + std::size_t len = input.length(); + + VERIFY(utf8::is_valid(p, p + len)); + + utf8::utf8to32(p, p + len, std::back_inserter(utf32chars)); + } + ~unistring() { + TRACE_DTOR(unistring); + } + + std::size_t length() const { + return utf32chars.size(); + } + + string extract(const std::size_t begin = 0, + const std::size_t len = 0) const + { + string utf8result; + utf8::utf32to8(utf32chars.begin() + begin, + utf32chars.begin() + begin + (len ? len : length()), + std::back_inserter(utf8result)); + return utf8result; + } +}; +#endif + class report_t; class format_t : public noncopyable @@ -135,7 +185,7 @@ public: elem->dump(out); } - static string truncate(const string& str, unsigned int width, + static string truncate(const unistring& str, unsigned int width, const bool is_account = false); }; -- cgit v1.2.3