diff options
author | John Wiegley <johnw@newartisans.com> | 2009-11-07 08:32:44 -0500 |
---|---|---|
committer | John Wiegley <johnw@newartisans.com> | 2009-11-07 08:34:13 -0500 |
commit | c8641a6de65670b8833992c94c51a586a6434a74 (patch) | |
tree | eb59642cd3296a98ec4c7a73ca319b1c57c2f7ad /src | |
parent | 95a068f5e4b0e5c06fd9824f7f999248e28fee7b (diff) | |
download | fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.gz fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.bz2 fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.zip |
Added support for Boost.Regex w/ ICU
This allows for correct searching of UTF-8 encoded strings, such as
lower-case versions of Russian words to find mixed-case words.
Diffstat (limited to 'src')
-rw-r--r-- | src/derive.cc | 6 | ||||
-rw-r--r-- | src/mask.cc | 6 | ||||
-rw-r--r-- | src/mask.h | 43 | ||||
-rw-r--r-- | src/post.cc | 2 | ||||
-rw-r--r-- | src/report.cc | 2 | ||||
-rw-r--r-- | src/system.hh.in | 4 | ||||
-rw-r--r-- | src/unistring.h | 7 |
7 files changed, 56 insertions, 14 deletions
diff --git a/src/derive.cc b/src/derive.cc index d3a7a37d..081b96b2 100644 --- a/src/derive.cc +++ b/src/derive.cc @@ -307,7 +307,7 @@ namespace { DEBUG("derive.xact", "Setting note from match: " << *added->note); #endif } else { - added->payee = tmpl.payee_mask.expr.str(); + added->payee = tmpl.payee_mask.str(); DEBUG("derive.xact", "Setting payee from template: " << added->payee); } @@ -403,14 +403,14 @@ namespace { account_t * acct = NULL; if (! acct) { - acct = journal.find_account_re(post.account_mask->expr.str()); + acct = journal.find_account_re(post.account_mask->str()); #if defined(DEBUG_ON) if (acct) DEBUG("derive.xact", "Found account as a regular expression"); #endif } if (! acct) { - acct = journal.find_account(post.account_mask->expr.str()); + acct = journal.find_account(post.account_mask->str()); #if defined(DEBUG_ON) if (acct) DEBUG("derive.xact", "Found (or created) account by name"); diff --git a/src/mask.cc b/src/mask.cc index 135f6669..c1e66ced 100644 --- a/src/mask.cc +++ b/src/mask.cc @@ -43,7 +43,11 @@ mask_t::mask_t(const string& pat) : expr() mask_t& mask_t::operator=(const string& pat) { - expr.assign(pat.c_str(), regex::perl | regex::icase); +#if defined(HAVE_BOOST_REGEX_UNICODE) + expr = boost::make_u32regex(pat.c_str(), boost::regex::perl | boost::regex::icase); +#else + expr.assign(pat.c_str(), boost::regex::perl | boost::regex::icase); +#endif VERIFY(valid()); return *this; } @@ -45,6 +45,9 @@ #define _MASK_H #include "utils.h" +#if defined(HAVE_BOOST_REGEX_UNICODE) +#include "unistring.h" +#endif namespace ledger { @@ -56,7 +59,11 @@ namespace ledger { class mask_t { public: +#if defined(HAVE_BOOST_REGEX_UNICODE) + boost::u32regex expr; +#else boost::regex expr; +#endif explicit mask_t(const string& pattern); @@ -76,17 +83,41 @@ public: return expr == other.expr; } - bool match(const string& str) const { + bool match(const string& text) const { +#if defined(HAVE_BOOST_REGEX_UNICODE) DEBUG("mask.match", - "Matching: \"" << str << "\" =~ /" << expr.str() << "/ = " - << (boost::regex_search(str, expr) ? "true" : "false")); - return boost::regex_search(str, expr); + "Matching: \"" << text << "\" =~ /" << str() << "/ = " + << (boost::u32regex_search(text, expr) ? "true" : "false")); + return boost::u32regex_search(text, expr); +#else + DEBUG("mask.match", + "Matching: \"" << text << "\" =~ /" << str() << "/ = " + << (boost::regex_search(text, expr) ? "true" : "false")); + return boost::regex_search(text, expr); +#endif } bool empty() const { return expr.empty(); } + string str() const { + if (! empty()) { +#if defined(HAVE_BOOST_REGEX_UNICODE) + assert(sizeof(boost::uint32_t) == sizeof(UChar32)); + unistring ustr; + std::basic_string<UChar32> expr_str = expr.str(); + std::copy(expr_str.begin(), expr_str.end(), + std::back_inserter(ustr.utf32chars)); + return ustr.extract(); +#else + return expr.str(); +#endif + } else { + return empty_string; + } + } + bool valid() const { if (expr.status() != 0) { DEBUG("ledger.validate", "mask_t: expr.status() != 0"); @@ -108,7 +139,7 @@ private: ar & temp; *this = temp; } else { - temp = expr.str(); + temp = str(); ar & temp; } } @@ -116,7 +147,7 @@ private: }; inline std::ostream& operator<<(std::ostream& out, const mask_t& mask) { - out << mask.expr.str(); + out << mask.str(); return out; } diff --git a/src/post.cc b/src/post.cc index 4f45592f..0fd763a9 100644 --- a/src/post.cc +++ b/src/post.cc @@ -246,7 +246,7 @@ namespace { if (env.value_at(0).is_string()) account = master->find_account(env.get<string>(0), false); else if (env.value_at(0).is_mask()) - account = master->find_account_re(env.get<mask_t>(0).expr.str()); + account = master->find_account_re(env.get<mask_t>(0).str()); } else { account = env->reported_account(); } diff --git a/src/report.cc b/src/report.cc index 77548cce..fbe8d37c 100644 --- a/src/report.cc +++ b/src/report.cc @@ -322,7 +322,7 @@ value_t report_t::fn_account_total(call_scope_t& args) acct = session.journal->find_account(name, false); } else if (args[0].is_mask()) { - name = args[0].as_mask().expr.str(); + name = args[0].as_mask().str(); acct = session.journal->find_account_re(name); } else { diff --git a/src/system.hh.in b/src/system.hh.in index b0b8f1eb..12f257eb 100644 --- a/src/system.hh.in +++ b/src/system.hh.in @@ -164,7 +164,11 @@ typedef std::ostream::pos_type ostream_pos_type; #include <boost/random/uniform_int.hpp> #include <boost/random/uniform_real.hpp> #include <boost/random/variate_generator.hpp> +#if defined(HAVE_BOOST_REGEX_UNICODE) +#include <boost/regex/icu.hpp> +#else #include <boost/regex.hpp> +#endif // HAVE_BOOST_REGEX_UNICODE #include <boost/variant.hpp> #include <boost/version.hpp> diff --git a/src/unistring.h b/src/unistring.h index 268f60e3..bc55b016 100644 --- a/src/unistring.h +++ b/src/unistring.h @@ -59,12 +59,15 @@ namespace ledger { */ class unistring { +public: std::vector<boost::uint32_t> utf32chars; -public: + unistring() { + TRACE_CTOR(unistring, ""); + } unistring(const std::string& input) { - TRACE_CTOR(unistring, ""); + TRACE_CTOR(unistring, "std::string"); const char * p = input.c_str(); std::size_t len = input.length(); |