summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-11-07 08:32:44 -0500
committerJohn Wiegley <johnw@newartisans.com>2009-11-07 08:34:13 -0500
commitc8641a6de65670b8833992c94c51a586a6434a74 (patch)
treeeb59642cd3296a98ec4c7a73ca319b1c57c2f7ad /src
parent95a068f5e4b0e5c06fd9824f7f999248e28fee7b (diff)
downloadfork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.gz
fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.bz2
fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.zip
Added support for Boost.Regex w/ ICU
This allows for correct searching of UTF-8 encoded strings, such as lower-case versions of Russian words to find mixed-case words.
Diffstat (limited to 'src')
-rw-r--r--src/derive.cc6
-rw-r--r--src/mask.cc6
-rw-r--r--src/mask.h43
-rw-r--r--src/post.cc2
-rw-r--r--src/report.cc2
-rw-r--r--src/system.hh.in4
-rw-r--r--src/unistring.h7
7 files changed, 56 insertions, 14 deletions
diff --git a/src/derive.cc b/src/derive.cc
index d3a7a37d..081b96b2 100644
--- a/src/derive.cc
+++ b/src/derive.cc
@@ -307,7 +307,7 @@ namespace {
DEBUG("derive.xact", "Setting note from match: " << *added->note);
#endif
} else {
- added->payee = tmpl.payee_mask.expr.str();
+ added->payee = tmpl.payee_mask.str();
DEBUG("derive.xact", "Setting payee from template: " << added->payee);
}
@@ -403,14 +403,14 @@ namespace {
account_t * acct = NULL;
if (! acct) {
- acct = journal.find_account_re(post.account_mask->expr.str());
+ acct = journal.find_account_re(post.account_mask->str());
#if defined(DEBUG_ON)
if (acct)
DEBUG("derive.xact", "Found account as a regular expression");
#endif
}
if (! acct) {
- acct = journal.find_account(post.account_mask->expr.str());
+ acct = journal.find_account(post.account_mask->str());
#if defined(DEBUG_ON)
if (acct)
DEBUG("derive.xact", "Found (or created) account by name");
diff --git a/src/mask.cc b/src/mask.cc
index 135f6669..c1e66ced 100644
--- a/src/mask.cc
+++ b/src/mask.cc
@@ -43,7 +43,11 @@ mask_t::mask_t(const string& pat) : expr()
mask_t& mask_t::operator=(const string& pat)
{
- expr.assign(pat.c_str(), regex::perl | regex::icase);
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+ expr = boost::make_u32regex(pat.c_str(), boost::regex::perl | boost::regex::icase);
+#else
+ expr.assign(pat.c_str(), boost::regex::perl | boost::regex::icase);
+#endif
VERIFY(valid());
return *this;
}
diff --git a/src/mask.h b/src/mask.h
index 32d27f42..62df9b63 100644
--- a/src/mask.h
+++ b/src/mask.h
@@ -45,6 +45,9 @@
#define _MASK_H
#include "utils.h"
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+#include "unistring.h"
+#endif
namespace ledger {
@@ -56,7 +59,11 @@ namespace ledger {
class mask_t
{
public:
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+ boost::u32regex expr;
+#else
boost::regex expr;
+#endif
explicit mask_t(const string& pattern);
@@ -76,17 +83,41 @@ public:
return expr == other.expr;
}
- bool match(const string& str) const {
+ bool match(const string& text) const {
+#if defined(HAVE_BOOST_REGEX_UNICODE)
DEBUG("mask.match",
- "Matching: \"" << str << "\" =~ /" << expr.str() << "/ = "
- << (boost::regex_search(str, expr) ? "true" : "false"));
- return boost::regex_search(str, expr);
+ "Matching: \"" << text << "\" =~ /" << str() << "/ = "
+ << (boost::u32regex_search(text, expr) ? "true" : "false"));
+ return boost::u32regex_search(text, expr);
+#else
+ DEBUG("mask.match",
+ "Matching: \"" << text << "\" =~ /" << str() << "/ = "
+ << (boost::regex_search(text, expr) ? "true" : "false"));
+ return boost::regex_search(text, expr);
+#endif
}
bool empty() const {
return expr.empty();
}
+ string str() const {
+ if (! empty()) {
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+ assert(sizeof(boost::uint32_t) == sizeof(UChar32));
+ unistring ustr;
+ std::basic_string<UChar32> expr_str = expr.str();
+ std::copy(expr_str.begin(), expr_str.end(),
+ std::back_inserter(ustr.utf32chars));
+ return ustr.extract();
+#else
+ return expr.str();
+#endif
+ } else {
+ return empty_string;
+ }
+ }
+
bool valid() const {
if (expr.status() != 0) {
DEBUG("ledger.validate", "mask_t: expr.status() != 0");
@@ -108,7 +139,7 @@ private:
ar & temp;
*this = temp;
} else {
- temp = expr.str();
+ temp = str();
ar & temp;
}
}
@@ -116,7 +147,7 @@ private:
};
inline std::ostream& operator<<(std::ostream& out, const mask_t& mask) {
- out << mask.expr.str();
+ out << mask.str();
return out;
}
diff --git a/src/post.cc b/src/post.cc
index 4f45592f..0fd763a9 100644
--- a/src/post.cc
+++ b/src/post.cc
@@ -246,7 +246,7 @@ namespace {
if (env.value_at(0).is_string())
account = master->find_account(env.get<string>(0), false);
else if (env.value_at(0).is_mask())
- account = master->find_account_re(env.get<mask_t>(0).expr.str());
+ account = master->find_account_re(env.get<mask_t>(0).str());
} else {
account = env->reported_account();
}
diff --git a/src/report.cc b/src/report.cc
index 77548cce..fbe8d37c 100644
--- a/src/report.cc
+++ b/src/report.cc
@@ -322,7 +322,7 @@ value_t report_t::fn_account_total(call_scope_t& args)
acct = session.journal->find_account(name, false);
}
else if (args[0].is_mask()) {
- name = args[0].as_mask().expr.str();
+ name = args[0].as_mask().str();
acct = session.journal->find_account_re(name);
}
else {
diff --git a/src/system.hh.in b/src/system.hh.in
index b0b8f1eb..12f257eb 100644
--- a/src/system.hh.in
+++ b/src/system.hh.in
@@ -164,7 +164,11 @@ typedef std::ostream::pos_type ostream_pos_type;
#include <boost/random/uniform_int.hpp>
#include <boost/random/uniform_real.hpp>
#include <boost/random/variate_generator.hpp>
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+#include <boost/regex/icu.hpp>
+#else
#include <boost/regex.hpp>
+#endif // HAVE_BOOST_REGEX_UNICODE
#include <boost/variant.hpp>
#include <boost/version.hpp>
diff --git a/src/unistring.h b/src/unistring.h
index 268f60e3..bc55b016 100644
--- a/src/unistring.h
+++ b/src/unistring.h
@@ -59,12 +59,15 @@ namespace ledger {
*/
class unistring
{
+public:
std::vector<boost::uint32_t> utf32chars;
-public:
+ unistring() {
+ TRACE_CTOR(unistring, "");
+ }
unistring(const std::string& input)
{
- TRACE_CTOR(unistring, "");
+ TRACE_CTOR(unistring, "std::string");
const char * p = input.c_str();
std::size_t len = input.length();