diff options
author | John Wiegley <johnw@newartisans.com> | 2009-11-07 08:32:44 -0500 |
---|---|---|
committer | John Wiegley <johnw@newartisans.com> | 2009-11-07 08:34:13 -0500 |
commit | c8641a6de65670b8833992c94c51a586a6434a74 (patch) | |
tree | eb59642cd3296a98ec4c7a73ca319b1c57c2f7ad /src/mask.h | |
parent | 95a068f5e4b0e5c06fd9824f7f999248e28fee7b (diff) | |
download | fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.gz fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.bz2 fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.zip |
Added support for Boost.Regex w/ ICU
This allows for correct searching of UTF-8 encoded strings, such as
lower-case versions of Russian words to find mixed-case words.
Diffstat (limited to 'src/mask.h')
-rw-r--r-- | src/mask.h | 43 |
1 files changed, 37 insertions, 6 deletions
@@ -45,6 +45,9 @@ #define _MASK_H #include "utils.h" +#if defined(HAVE_BOOST_REGEX_UNICODE) +#include "unistring.h" +#endif namespace ledger { @@ -56,7 +59,11 @@ namespace ledger { class mask_t { public: +#if defined(HAVE_BOOST_REGEX_UNICODE) + boost::u32regex expr; +#else boost::regex expr; +#endif explicit mask_t(const string& pattern); @@ -76,17 +83,41 @@ public: return expr == other.expr; } - bool match(const string& str) const { + bool match(const string& text) const { +#if defined(HAVE_BOOST_REGEX_UNICODE) DEBUG("mask.match", - "Matching: \"" << str << "\" =~ /" << expr.str() << "/ = " - << (boost::regex_search(str, expr) ? "true" : "false")); - return boost::regex_search(str, expr); + "Matching: \"" << text << "\" =~ /" << str() << "/ = " + << (boost::u32regex_search(text, expr) ? "true" : "false")); + return boost::u32regex_search(text, expr); +#else + DEBUG("mask.match", + "Matching: \"" << text << "\" =~ /" << str() << "/ = " + << (boost::regex_search(text, expr) ? "true" : "false")); + return boost::regex_search(text, expr); +#endif } bool empty() const { return expr.empty(); } + string str() const { + if (! empty()) { +#if defined(HAVE_BOOST_REGEX_UNICODE) + assert(sizeof(boost::uint32_t) == sizeof(UChar32)); + unistring ustr; + std::basic_string<UChar32> expr_str = expr.str(); + std::copy(expr_str.begin(), expr_str.end(), + std::back_inserter(ustr.utf32chars)); + return ustr.extract(); +#else + return expr.str(); +#endif + } else { + return empty_string; + } + } + bool valid() const { if (expr.status() != 0) { DEBUG("ledger.validate", "mask_t: expr.status() != 0"); @@ -108,7 +139,7 @@ private: ar & temp; *this = temp; } else { - temp = expr.str(); + temp = str(); ar & temp; } } @@ -116,7 +147,7 @@ private: }; inline std::ostream& operator<<(std::ostream& out, const mask_t& mask) { - out << mask.expr.str(); + out << mask.str(); return out; } |