summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-11-07 08:32:44 -0500
committerJohn Wiegley <johnw@newartisans.com>2009-11-07 08:34:13 -0500
commitc8641a6de65670b8833992c94c51a586a6434a74 (patch)
treeeb59642cd3296a98ec4c7a73ca319b1c57c2f7ad
parent95a068f5e4b0e5c06fd9824f7f999248e28fee7b (diff)
downloadfork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.gz
fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.tar.bz2
fork-ledger-c8641a6de65670b8833992c94c51a586a6434a74.zip
Added support for Boost.Regex w/ ICU
This allows for correct searching of UTF-8 encoded strings, such as lower-case versions of Russian words to find mixed-case words.
-rwxr-xr-xacprep20
-rw-r--r--doc/sample.dat2
-rw-r--r--lib/Makefile39
-rw-r--r--src/derive.cc6
-rw-r--r--src/mask.cc6
-rw-r--r--src/mask.h43
-rw-r--r--src/post.cc2
-rw-r--r--src/report.cc2
-rw-r--r--src/system.hh.in4
-rw-r--r--src/unistring.h7
-rw-r--r--tools/configure.ac23
11 files changed, 127 insertions, 27 deletions
diff --git a/acprep b/acprep
index 80de4c73..6d4a223a 100755
--- a/acprep
+++ b/acprep
@@ -751,6 +751,10 @@ class PrepareBuild(CommandLineApp):
self.sys_include_dirs.insert(0, '/usr/local/stow/cppunit/include')
self.sys_library_dirs.insert(0, '/usr/local/stow/cppunit/lib')
+ if exists('/usr/local/stow/icu/include'):
+ self.sys_include_dirs.insert(0, '/usr/local/stow/icu/include')
+ self.sys_library_dirs.insert(0, '/usr/local/stow/icu/lib')
+
self.CXXFLAGS.append('-march=nocona')
self.CXXFLAGS.append('-msse3')
self.CPPFLAGS.append('-D_GLIBCXX_FULLY_DYNAMIC_STRING=1')
@@ -979,6 +983,14 @@ class PrepareBuild(CommandLineApp):
self.sys_include_dirs.insert(0, '/usr/local/stow/cppunit-debug/include')
self.sys_library_dirs.insert(0, '/usr/local/stow/cppunit-debug/lib')
+ if exists('/usr/local/stow/icu-debug/include'):
+ if '/usr/local/stow/icu/include' in self.sys_include_dirs:
+ self.sys_include_dirs.remove('/usr/local/stow/icu/include')
+ self.sys_library_dirs.remove('/usr/local/stow/icu/lib')
+
+ self.sys_include_dirs.insert(0, '/usr/local/stow/icu-debug/include')
+ self.sys_library_dirs.insert(0, '/usr/local/stow/icu-debug/lib')
+
if exists('/opt/local/lib/libboost_regex-d.a'):
self.envvars['BOOST_HOME'] = '/opt/local'
self.envvars['BOOST_SUFFIX'] = '-d'
@@ -988,9 +1000,9 @@ class PrepareBuild(CommandLineApp):
self.sys_include_dirs.append('/opt/local/include/boost')
- elif exists('/usr/local/lib/libboost_regex-xgcc44-sd-1_40.a'):
+ elif exists('/usr/local/lib/libboost_regex-xgcc44-d-1_40.a'):
self.envvars['BOOST_HOME'] = '/usr/local'
- self.envvars['BOOST_SUFFIX'] = '-xgcc44-sd-1_40'
+ self.envvars['BOOST_SUFFIX'] = '-xgcc44-d-1_40'
self.log.info('Setting BOOST_SUFFIX => %s' %
self.envvars['BOOST_SUFFIX'])
@@ -1005,9 +1017,9 @@ class PrepareBuild(CommandLineApp):
self.sys_include_dirs.append('/opt/local/include/boost')
- elif exists('/usr/local/lib/libboost_regex-xgcc44-s-1_40.a'):
+ elif exists('/usr/local/lib/libboost_regex-xgcc44-1_40.a'):
self.envvars['BOOST_HOME'] = '/usr/local'
- self.envvars['BOOST_SUFFIX'] = '-xgcc44-s-1_40'
+ self.envvars['BOOST_SUFFIX'] = '-xgcc44-1_40'
self.log.info('Setting BOOST_SUFFIX => %s' %
self.envvars['BOOST_SUFFIX'])
diff --git a/doc/sample.dat b/doc/sample.dat
index 002d20ee..e773d6df 100644
--- a/doc/sample.dat
+++ b/doc/sample.dat
@@ -24,7 +24,7 @@ N $
Income:Salary
2004/05/14 * Another dày in which there is Páying
- Русский язык:Русский язык:Русский язык:Русский язык $1000.00
+ Русский язык:Активы:Русский язык:Русский язык $1000.00
Income:Salary
2004/05/27 Book Store
diff --git a/lib/Makefile b/lib/Makefile
index 07cf77ea..3a9c3214 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -2,34 +2,57 @@
# This is only important if you intend to produce a Ledger binary for
# installation.
-#ARCH_CFLAGS = -g -arch i386 -arch ppc -isysroot /Developer/SDKs/MacOSX10.5.sdk
-#ARCH_LDFLAGS = -g -arch i386 -arch ppc -Wl,-syslibroot,/Developer/SDKs/MacOSX10.5.sdk
-
STOW_ROOT = /usr/local/stow
BOOST_SOURCE = boost
BOOST_VERSION = 1_40_0
-# architecture=combined
+icu-release:
+ -(cd icu/source; make distclean)
+ (cd icu/source; sh autogen.sh; \
+ ./configure CPPFLAGS="" \
+ CFLAGS="$(ARCH_CFLAGS)" \
+ LDFLAGS="$(ARCH_LDFLAGS)" \
+ CC="$(CC)" CXX="$(CXX)" LD="$(LD)" \
+ --enable-static \
+ --prefix=$(STOW_ROOT)/icu && \
+ make install)
+
+icu-debug:
+ -(cd icu/source; make distclean)
+ (cd icu/source; sh autogen.sh; \
+ ./configure CPPFLAGS="-D_GLIBCXX_DEBUG=1" \
+ CFLAGS="-g $(ARCH_CFLAGS)" \
+ LDFLAGS="-g $(ARCH_LDFLAGS)" \
+ CC="$(CC)" CXX="$(CXX)" LD="$(LD)" \
+ --enable-static --enable-debug \
+ --prefix=$(STOW_ROOT)/icu-debug && \
+ make install)
+
+icu-build: icu-release icu-debug
+
boost-release:
(cd $(BOOST_SOURCE) && \
bjam release --prefix=$(STOW_ROOT)/boost_$(BOOST_VERSION) \
--build-dir=$(HOME)/Products/boost_$(BOOST_VERSION) \
- --toolset=darwin --build-type=complete --layout=versioned install)
+ --toolset=darwin --build-type=complete --layout=versioned \
+ -sHAVE_ICU=1 -sICU_PATH=/usr/local/stow/icu install)
boost-debug:
(cd $(BOOST_SOURCE) && \
bjam debug --prefix=$(STOW_ROOT)/boost_$(BOOST_VERSION) \
--build-dir=$(HOME)/Products/boost_$(BOOST_VERSION) \
--toolset=darwin --build-type=complete --layout=versioned \
- define=_GLIBCXX_DEBUG=1 install)
+ define=_GLIBCXX_DEBUG=1 \
+ -sHAVE_ICU=1 -sICU_PATH=/usr/local/stow/icu-debug install)
boost-build: boost-release boost-debug
cppunit-release:
-(cd cppunit; make distclean)
(cd cppunit; sh autogen.sh; \
- ./configure CFLAGS="$(ARCH_CFLAGS)" \
+ ./configure CPPFLAGS="" \
+ CFLAGS="$(ARCH_CFLAGS)" \
LDFLAGS="$(ARCH_LDFLAGS)" \
CC="$(CC)" CXX="$(CXX)" LD="$(LD)" \
--prefix=$(STOW_ROOT)/cppunit && \
@@ -47,4 +70,4 @@ cppunit-debug:
cppunit-build: cppunit-release cppunit-debug
-build-all: boost-build cppunit-build
+all: boost-build cppunit-build
diff --git a/src/derive.cc b/src/derive.cc
index d3a7a37d..081b96b2 100644
--- a/src/derive.cc
+++ b/src/derive.cc
@@ -307,7 +307,7 @@ namespace {
DEBUG("derive.xact", "Setting note from match: " << *added->note);
#endif
} else {
- added->payee = tmpl.payee_mask.expr.str();
+ added->payee = tmpl.payee_mask.str();
DEBUG("derive.xact", "Setting payee from template: " << added->payee);
}
@@ -403,14 +403,14 @@ namespace {
account_t * acct = NULL;
if (! acct) {
- acct = journal.find_account_re(post.account_mask->expr.str());
+ acct = journal.find_account_re(post.account_mask->str());
#if defined(DEBUG_ON)
if (acct)
DEBUG("derive.xact", "Found account as a regular expression");
#endif
}
if (! acct) {
- acct = journal.find_account(post.account_mask->expr.str());
+ acct = journal.find_account(post.account_mask->str());
#if defined(DEBUG_ON)
if (acct)
DEBUG("derive.xact", "Found (or created) account by name");
diff --git a/src/mask.cc b/src/mask.cc
index 135f6669..c1e66ced 100644
--- a/src/mask.cc
+++ b/src/mask.cc
@@ -43,7 +43,11 @@ mask_t::mask_t(const string& pat) : expr()
mask_t& mask_t::operator=(const string& pat)
{
- expr.assign(pat.c_str(), regex::perl | regex::icase);
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+ expr = boost::make_u32regex(pat.c_str(), boost::regex::perl | boost::regex::icase);
+#else
+ expr.assign(pat.c_str(), boost::regex::perl | boost::regex::icase);
+#endif
VERIFY(valid());
return *this;
}
diff --git a/src/mask.h b/src/mask.h
index 32d27f42..62df9b63 100644
--- a/src/mask.h
+++ b/src/mask.h
@@ -45,6 +45,9 @@
#define _MASK_H
#include "utils.h"
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+#include "unistring.h"
+#endif
namespace ledger {
@@ -56,7 +59,11 @@ namespace ledger {
class mask_t
{
public:
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+ boost::u32regex expr;
+#else
boost::regex expr;
+#endif
explicit mask_t(const string& pattern);
@@ -76,17 +83,41 @@ public:
return expr == other.expr;
}
- bool match(const string& str) const {
+ bool match(const string& text) const {
+#if defined(HAVE_BOOST_REGEX_UNICODE)
DEBUG("mask.match",
- "Matching: \"" << str << "\" =~ /" << expr.str() << "/ = "
- << (boost::regex_search(str, expr) ? "true" : "false"));
- return boost::regex_search(str, expr);
+ "Matching: \"" << text << "\" =~ /" << str() << "/ = "
+ << (boost::u32regex_search(text, expr) ? "true" : "false"));
+ return boost::u32regex_search(text, expr);
+#else
+ DEBUG("mask.match",
+ "Matching: \"" << text << "\" =~ /" << str() << "/ = "
+ << (boost::regex_search(text, expr) ? "true" : "false"));
+ return boost::regex_search(text, expr);
+#endif
}
bool empty() const {
return expr.empty();
}
+ string str() const {
+ if (! empty()) {
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+ assert(sizeof(boost::uint32_t) == sizeof(UChar32));
+ unistring ustr;
+ std::basic_string<UChar32> expr_str = expr.str();
+ std::copy(expr_str.begin(), expr_str.end(),
+ std::back_inserter(ustr.utf32chars));
+ return ustr.extract();
+#else
+ return expr.str();
+#endif
+ } else {
+ return empty_string;
+ }
+ }
+
bool valid() const {
if (expr.status() != 0) {
DEBUG("ledger.validate", "mask_t: expr.status() != 0");
@@ -108,7 +139,7 @@ private:
ar & temp;
*this = temp;
} else {
- temp = expr.str();
+ temp = str();
ar & temp;
}
}
@@ -116,7 +147,7 @@ private:
};
inline std::ostream& operator<<(std::ostream& out, const mask_t& mask) {
- out << mask.expr.str();
+ out << mask.str();
return out;
}
diff --git a/src/post.cc b/src/post.cc
index 4f45592f..0fd763a9 100644
--- a/src/post.cc
+++ b/src/post.cc
@@ -246,7 +246,7 @@ namespace {
if (env.value_at(0).is_string())
account = master->find_account(env.get<string>(0), false);
else if (env.value_at(0).is_mask())
- account = master->find_account_re(env.get<mask_t>(0).expr.str());
+ account = master->find_account_re(env.get<mask_t>(0).str());
} else {
account = env->reported_account();
}
diff --git a/src/report.cc b/src/report.cc
index 77548cce..fbe8d37c 100644
--- a/src/report.cc
+++ b/src/report.cc
@@ -322,7 +322,7 @@ value_t report_t::fn_account_total(call_scope_t& args)
acct = session.journal->find_account(name, false);
}
else if (args[0].is_mask()) {
- name = args[0].as_mask().expr.str();
+ name = args[0].as_mask().str();
acct = session.journal->find_account_re(name);
}
else {
diff --git a/src/system.hh.in b/src/system.hh.in
index b0b8f1eb..12f257eb 100644
--- a/src/system.hh.in
+++ b/src/system.hh.in
@@ -164,7 +164,11 @@ typedef std::ostream::pos_type ostream_pos_type;
#include <boost/random/uniform_int.hpp>
#include <boost/random/uniform_real.hpp>
#include <boost/random/variate_generator.hpp>
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+#include <boost/regex/icu.hpp>
+#else
#include <boost/regex.hpp>
+#endif // HAVE_BOOST_REGEX_UNICODE
#include <boost/variant.hpp>
#include <boost/version.hpp>
diff --git a/src/unistring.h b/src/unistring.h
index 268f60e3..bc55b016 100644
--- a/src/unistring.h
+++ b/src/unistring.h
@@ -59,12 +59,15 @@ namespace ledger {
*/
class unistring
{
+public:
std::vector<boost::uint32_t> utf32chars;
-public:
+ unistring() {
+ TRACE_CTOR(unistring, "");
+ }
unistring(const std::string& input)
{
- TRACE_CTOR(unistring, "");
+ TRACE_CTOR(unistring, "std::string");
const char * p = input.c_str();
std::size_t len = input.length();
diff --git a/tools/configure.ac b/tools/configure.ac
index 22b4b96a..747d940d 100644
--- a/tools/configure.ac
+++ b/tools/configure.ac
@@ -193,6 +193,29 @@ else
AC_MSG_FAILURE("Could not find boost_regex library (set CPPFLAGS and LDFLAGS?)")
fi
+AC_CACHE_CHECK(
+ [if boost_regex w/ICU is available],
+ [boost_regex_icu_avail_cv_],
+ [boost_regex_icu_save_libs=$LIBS
+ LIBS="-licuuc $LIBS"
+ AC_LANG_PUSH(C++)
+ AC_LINK_IFELSE(
+ [AC_LANG_PROGRAM(
+ [[#include <boost/regex/icu.hpp>
+ using namespace boost;]],
+ [[std::string text = "Активы";
+ u32regex r = make_u32regex("активы", regex::perl | regex::icase);
+ return u32regex_search(text, r) ? 0 : 1;]])],
+ [boost_regex_icu_avail_cv_=true],
+ [boost_regex_icu_avail_cv_=false])
+ AC_LANG_POP
+ LIBS=$boost_regex_icu_save_libs])
+
+if [test x$boost_regex_icu_avail_cv_ = xtrue ]; then
+ AC_DEFINE([HAVE_BOOST_REGEX_UNICODE], [1], [If the boost_regex library w/ICU is available])
+ LIBS="-licuuc $LIBS"
+fi
+
# check for boost_date_time
AC_CACHE_CHECK(
[if boost_date_time is available],