summaryrefslogtreecommitdiff
path: root/src/mask.h
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-11-07 08:32:44 -0500
committerJohn Wiegley <johnw@newartisans.com>2009-11-07 08:34:13 -0500
commitc8641a6de65670b8833992c94c51a586a6434a74 (patch)
treeeb59642cd3296a98ec4c7a73ca319b1c57c2f7ad /src/mask.h
parent95a068f5e4b0e5c06fd9824f7f999248e28fee7b (diff)
downloadfork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.gz
fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.bz2
fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.zip
Added support for Boost.Regex w/ ICU
This allows for correct searching of UTF-8 encoded strings, such as lower-case versions of Russian words to find mixed-case words.
Diffstat (limited to 'src/mask.h')
-rw-r--r--src/mask.h43
1 files changed, 37 insertions, 6 deletions
diff --git a/src/mask.h b/src/mask.h
index 32d27f42..62df9b63 100644
--- a/src/mask.h
+++ b/src/mask.h
@@ -45,6 +45,9 @@
#define _MASK_H
#include "utils.h"
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+#include "unistring.h"
+#endif
namespace ledger {
@@ -56,7 +59,11 @@ namespace ledger {
class mask_t
{
public:
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+ boost::u32regex expr;
+#else
boost::regex expr;
+#endif
explicit mask_t(const string& pattern);
@@ -76,17 +83,41 @@ public:
return expr == other.expr;
}
- bool match(const string& str) const {
+ bool match(const string& text) const {
+#if defined(HAVE_BOOST_REGEX_UNICODE)
DEBUG("mask.match",
- "Matching: \"" << str << "\" =~ /" << expr.str() << "/ = "
- << (boost::regex_search(str, expr) ? "true" : "false"));
- return boost::regex_search(str, expr);
+ "Matching: \"" << text << "\" =~ /" << str() << "/ = "
+ << (boost::u32regex_search(text, expr) ? "true" : "false"));
+ return boost::u32regex_search(text, expr);
+#else
+ DEBUG("mask.match",
+ "Matching: \"" << text << "\" =~ /" << str() << "/ = "
+ << (boost::regex_search(text, expr) ? "true" : "false"));
+ return boost::regex_search(text, expr);
+#endif
}
bool empty() const {
return expr.empty();
}
+ string str() const {
+ if (! empty()) {
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+ assert(sizeof(boost::uint32_t) == sizeof(UChar32));
+ unistring ustr;
+ std::basic_string<UChar32> expr_str = expr.str();
+ std::copy(expr_str.begin(), expr_str.end(),
+ std::back_inserter(ustr.utf32chars));
+ return ustr.extract();
+#else
+ return expr.str();
+#endif
+ } else {
+ return empty_string;
+ }
+ }
+
bool valid() const {
if (expr.status() != 0) {
DEBUG("ledger.validate", "mask_t: expr.status() != 0");
@@ -108,7 +139,7 @@ private:
ar & temp;
*this = temp;
} else {
- temp = expr.str();
+ temp = str();
ar & temp;
}
}
@@ -116,7 +147,7 @@ private:
};
inline std::ostream& operator<<(std::ostream& out, const mask_t& mask) {
- out << mask.expr.str();
+ out << mask.str();
return out;
}