From 0cf9fbfbf8d7eb1c30369938c2239e31d44c9a4e Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Sat, 7 Nov 2009 06:38:57 -0500 Subject: acprep's --boost option now takes an argument --- acprep | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'acprep') diff --git a/acprep b/acprep index b2c39b7c..80de4c73 100755 --- a/acprep +++ b/acprep @@ -234,7 +234,7 @@ class PrepareBuild(CommandLineApp): type='int', action='store', dest='jobs', default=1, help='Allow N make jobs at once') op.add_option('', '--boost', metavar='SUFFIX', - action="callback", + action="callback", type="string", callback=self.option_boost, help='Set Boost library suffix (ex: "--boost=-mt")') op.add_option('', '--force', action="callback", -- cgit v1.2.3 From c8641a6de65670b8833992c94c51a586a6434a74 Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Sat, 7 Nov 2009 08:32:44 -0500 Subject: Added support for Boost.Regex w/ ICU This allows for correct searching of UTF-8 encoded strings, such as lower-case versions of Russian words to find mixed-case words. --- acprep | 20 ++++++++++++++++---- doc/sample.dat | 2 +- lib/Makefile | 39 +++++++++++++++++++++++++++++++-------- src/derive.cc | 6 +++--- src/mask.cc | 6 +++++- src/mask.h | 43 +++++++++++++++++++++++++++++++++++++------ src/post.cc | 2 +- src/report.cc | 2 +- src/system.hh.in | 4 ++++ src/unistring.h | 7 +++++-- tools/configure.ac | 23 +++++++++++++++++++++++ 11 files changed, 127 insertions(+), 27 deletions(-) (limited to 'acprep') diff --git a/acprep b/acprep index 80de4c73..6d4a223a 100755 --- a/acprep +++ b/acprep @@ -751,6 +751,10 @@ class PrepareBuild(CommandLineApp): self.sys_include_dirs.insert(0, '/usr/local/stow/cppunit/include') self.sys_library_dirs.insert(0, '/usr/local/stow/cppunit/lib') + if exists('/usr/local/stow/icu/include'): + self.sys_include_dirs.insert(0, '/usr/local/stow/icu/include') + self.sys_library_dirs.insert(0, '/usr/local/stow/icu/lib') + self.CXXFLAGS.append('-march=nocona') self.CXXFLAGS.append('-msse3') self.CPPFLAGS.append('-D_GLIBCXX_FULLY_DYNAMIC_STRING=1') @@ -979,6 +983,14 @@ class PrepareBuild(CommandLineApp): self.sys_include_dirs.insert(0, '/usr/local/stow/cppunit-debug/include') self.sys_library_dirs.insert(0, '/usr/local/stow/cppunit-debug/lib') + if exists('/usr/local/stow/icu-debug/include'): + if '/usr/local/stow/icu/include' in self.sys_include_dirs: + self.sys_include_dirs.remove('/usr/local/stow/icu/include') + self.sys_library_dirs.remove('/usr/local/stow/icu/lib') + + self.sys_include_dirs.insert(0, '/usr/local/stow/icu-debug/include') + self.sys_library_dirs.insert(0, '/usr/local/stow/icu-debug/lib') + if exists('/opt/local/lib/libboost_regex-d.a'): self.envvars['BOOST_HOME'] = '/opt/local' self.envvars['BOOST_SUFFIX'] = '-d' @@ -988,9 +1000,9 @@ class PrepareBuild(CommandLineApp): self.sys_include_dirs.append('/opt/local/include/boost') - elif exists('/usr/local/lib/libboost_regex-xgcc44-sd-1_40.a'): + elif exists('/usr/local/lib/libboost_regex-xgcc44-d-1_40.a'): self.envvars['BOOST_HOME'] = '/usr/local' - self.envvars['BOOST_SUFFIX'] = '-xgcc44-sd-1_40' + self.envvars['BOOST_SUFFIX'] = '-xgcc44-d-1_40' self.log.info('Setting BOOST_SUFFIX => %s' % self.envvars['BOOST_SUFFIX']) @@ -1005,9 +1017,9 @@ class PrepareBuild(CommandLineApp): self.sys_include_dirs.append('/opt/local/include/boost') - elif exists('/usr/local/lib/libboost_regex-xgcc44-s-1_40.a'): + elif exists('/usr/local/lib/libboost_regex-xgcc44-1_40.a'): self.envvars['BOOST_HOME'] = '/usr/local' - self.envvars['BOOST_SUFFIX'] = '-xgcc44-s-1_40' + self.envvars['BOOST_SUFFIX'] = '-xgcc44-1_40' self.log.info('Setting BOOST_SUFFIX => %s' % self.envvars['BOOST_SUFFIX']) diff --git a/doc/sample.dat b/doc/sample.dat index 002d20ee..e773d6df 100644 --- a/doc/sample.dat +++ b/doc/sample.dat @@ -24,7 +24,7 @@ N $ Income:Salary 2004/05/14 * Another dày in which there is Páying - Русский язык:Русский язык:Русский язык:Русский язык $1000.00 + Русский язык:Активы:Русский язык:Русский язык $1000.00 Income:Salary 2004/05/27 Book Store diff --git a/lib/Makefile b/lib/Makefile index 07cf77ea..3a9c3214 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -2,34 +2,57 @@ # This is only important if you intend to produce a Ledger binary for # installation. -#ARCH_CFLAGS = -g -arch i386 -arch ppc -isysroot /Developer/SDKs/MacOSX10.5.sdk -#ARCH_LDFLAGS = -g -arch i386 -arch ppc -Wl,-syslibroot,/Developer/SDKs/MacOSX10.5.sdk - STOW_ROOT = /usr/local/stow BOOST_SOURCE = boost BOOST_VERSION = 1_40_0 -# architecture=combined +icu-release: + -(cd icu/source; make distclean) + (cd icu/source; sh autogen.sh; \ + ./configure CPPFLAGS="" \ + CFLAGS="$(ARCH_CFLAGS)" \ + LDFLAGS="$(ARCH_LDFLAGS)" \ + CC="$(CC)" CXX="$(CXX)" LD="$(LD)" \ + --enable-static \ + --prefix=$(STOW_ROOT)/icu && \ + make install) + +icu-debug: + -(cd icu/source; make distclean) + (cd icu/source; sh autogen.sh; \ + ./configure CPPFLAGS="-D_GLIBCXX_DEBUG=1" \ + CFLAGS="-g $(ARCH_CFLAGS)" \ + LDFLAGS="-g $(ARCH_LDFLAGS)" \ + CC="$(CC)" CXX="$(CXX)" LD="$(LD)" \ + --enable-static --enable-debug \ + --prefix=$(STOW_ROOT)/icu-debug && \ + make install) + +icu-build: icu-release icu-debug + boost-release: (cd $(BOOST_SOURCE) && \ bjam release --prefix=$(STOW_ROOT)/boost_$(BOOST_VERSION) \ --build-dir=$(HOME)/Products/boost_$(BOOST_VERSION) \ - --toolset=darwin --build-type=complete --layout=versioned install) + --toolset=darwin --build-type=complete --layout=versioned \ + -sHAVE_ICU=1 -sICU_PATH=/usr/local/stow/icu install) boost-debug: (cd $(BOOST_SOURCE) && \ bjam debug --prefix=$(STOW_ROOT)/boost_$(BOOST_VERSION) \ --build-dir=$(HOME)/Products/boost_$(BOOST_VERSION) \ --toolset=darwin --build-type=complete --layout=versioned \ - define=_GLIBCXX_DEBUG=1 install) + define=_GLIBCXX_DEBUG=1 \ + -sHAVE_ICU=1 -sICU_PATH=/usr/local/stow/icu-debug install) boost-build: boost-release boost-debug cppunit-release: -(cd cppunit; make distclean) (cd cppunit; sh autogen.sh; \ - ./configure CFLAGS="$(ARCH_CFLAGS)" \ + ./configure CPPFLAGS="" \ + CFLAGS="$(ARCH_CFLAGS)" \ LDFLAGS="$(ARCH_LDFLAGS)" \ CC="$(CC)" CXX="$(CXX)" LD="$(LD)" \ --prefix=$(STOW_ROOT)/cppunit && \ @@ -47,4 +70,4 @@ cppunit-debug: cppunit-build: cppunit-release cppunit-debug -build-all: boost-build cppunit-build +all: boost-build cppunit-build diff --git a/src/derive.cc b/src/derive.cc index d3a7a37d..081b96b2 100644 --- a/src/derive.cc +++ b/src/derive.cc @@ -307,7 +307,7 @@ namespace { DEBUG("derive.xact", "Setting note from match: " << *added->note); #endif } else { - added->payee = tmpl.payee_mask.expr.str(); + added->payee = tmpl.payee_mask.str(); DEBUG("derive.xact", "Setting payee from template: " << added->payee); } @@ -403,14 +403,14 @@ namespace { account_t * acct = NULL; if (! acct) { - acct = journal.find_account_re(post.account_mask->expr.str()); + acct = journal.find_account_re(post.account_mask->str()); #if defined(DEBUG_ON) if (acct) DEBUG("derive.xact", "Found account as a regular expression"); #endif } if (! acct) { - acct = journal.find_account(post.account_mask->expr.str()); + acct = journal.find_account(post.account_mask->str()); #if defined(DEBUG_ON) if (acct) DEBUG("derive.xact", "Found (or created) account by name"); diff --git a/src/mask.cc b/src/mask.cc index 135f6669..c1e66ced 100644 --- a/src/mask.cc +++ b/src/mask.cc @@ -43,7 +43,11 @@ mask_t::mask_t(const string& pat) : expr() mask_t& mask_t::operator=(const string& pat) { - expr.assign(pat.c_str(), regex::perl | regex::icase); +#if defined(HAVE_BOOST_REGEX_UNICODE) + expr = boost::make_u32regex(pat.c_str(), boost::regex::perl | boost::regex::icase); +#else + expr.assign(pat.c_str(), boost::regex::perl | boost::regex::icase); +#endif VERIFY(valid()); return *this; } diff --git a/src/mask.h b/src/mask.h index 32d27f42..62df9b63 100644 --- a/src/mask.h +++ b/src/mask.h @@ -45,6 +45,9 @@ #define _MASK_H #include "utils.h" +#if defined(HAVE_BOOST_REGEX_UNICODE) +#include "unistring.h" +#endif namespace ledger { @@ -56,7 +59,11 @@ namespace ledger { class mask_t { public: +#if defined(HAVE_BOOST_REGEX_UNICODE) + boost::u32regex expr; +#else boost::regex expr; +#endif explicit mask_t(const string& pattern); @@ -76,17 +83,41 @@ public: return expr == other.expr; } - bool match(const string& str) const { + bool match(const string& text) const { +#if defined(HAVE_BOOST_REGEX_UNICODE) DEBUG("mask.match", - "Matching: \"" << str << "\" =~ /" << expr.str() << "/ = " - << (boost::regex_search(str, expr) ? "true" : "false")); - return boost::regex_search(str, expr); + "Matching: \"" << text << "\" =~ /" << str() << "/ = " + << (boost::u32regex_search(text, expr) ? "true" : "false")); + return boost::u32regex_search(text, expr); +#else + DEBUG("mask.match", + "Matching: \"" << text << "\" =~ /" << str() << "/ = " + << (boost::regex_search(text, expr) ? "true" : "false")); + return boost::regex_search(text, expr); +#endif } bool empty() const { return expr.empty(); } + string str() const { + if (! empty()) { +#if defined(HAVE_BOOST_REGEX_UNICODE) + assert(sizeof(boost::uint32_t) == sizeof(UChar32)); + unistring ustr; + std::basic_string expr_str = expr.str(); + std::copy(expr_str.begin(), expr_str.end(), + std::back_inserter(ustr.utf32chars)); + return ustr.extract(); +#else + return expr.str(); +#endif + } else { + return empty_string; + } + } + bool valid() const { if (expr.status() != 0) { DEBUG("ledger.validate", "mask_t: expr.status() != 0"); @@ -108,7 +139,7 @@ private: ar & temp; *this = temp; } else { - temp = expr.str(); + temp = str(); ar & temp; } } @@ -116,7 +147,7 @@ private: }; inline std::ostream& operator<<(std::ostream& out, const mask_t& mask) { - out << mask.expr.str(); + out << mask.str(); return out; } diff --git a/src/post.cc b/src/post.cc index 4f45592f..0fd763a9 100644 --- a/src/post.cc +++ b/src/post.cc @@ -246,7 +246,7 @@ namespace { if (env.value_at(0).is_string()) account = master->find_account(env.get(0), false); else if (env.value_at(0).is_mask()) - account = master->find_account_re(env.get(0).expr.str()); + account = master->find_account_re(env.get(0).str()); } else { account = env->reported_account(); } diff --git a/src/report.cc b/src/report.cc index 77548cce..fbe8d37c 100644 --- a/src/report.cc +++ b/src/report.cc @@ -322,7 +322,7 @@ value_t report_t::fn_account_total(call_scope_t& args) acct = session.journal->find_account(name, false); } else if (args[0].is_mask()) { - name = args[0].as_mask().expr.str(); + name = args[0].as_mask().str(); acct = session.journal->find_account_re(name); } else { diff --git a/src/system.hh.in b/src/system.hh.in index b0b8f1eb..12f257eb 100644 --- a/src/system.hh.in +++ b/src/system.hh.in @@ -164,7 +164,11 @@ typedef std::ostream::pos_type ostream_pos_type; #include #include #include +#if defined(HAVE_BOOST_REGEX_UNICODE) +#include +#else #include +#endif // HAVE_BOOST_REGEX_UNICODE #include #include diff --git a/src/unistring.h b/src/unistring.h index 268f60e3..bc55b016 100644 --- a/src/unistring.h +++ b/src/unistring.h @@ -59,12 +59,15 @@ namespace ledger { */ class unistring { +public: std::vector utf32chars; -public: + unistring() { + TRACE_CTOR(unistring, ""); + } unistring(const std::string& input) { - TRACE_CTOR(unistring, ""); + TRACE_CTOR(unistring, "std::string"); const char * p = input.c_str(); std::size_t len = input.length(); diff --git a/tools/configure.ac b/tools/configure.ac index 22b4b96a..747d940d 100644 --- a/tools/configure.ac +++ b/tools/configure.ac @@ -193,6 +193,29 @@ else AC_MSG_FAILURE("Could not find boost_regex library (set CPPFLAGS and LDFLAGS?)") fi +AC_CACHE_CHECK( + [if boost_regex w/ICU is available], + [boost_regex_icu_avail_cv_], + [boost_regex_icu_save_libs=$LIBS + LIBS="-licuuc $LIBS" + AC_LANG_PUSH(C++) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include + using namespace boost;]], + [[std::string text = "Активы"; + u32regex r = make_u32regex("активы", regex::perl | regex::icase); + return u32regex_search(text, r) ? 0 : 1;]])], + [boost_regex_icu_avail_cv_=true], + [boost_regex_icu_avail_cv_=false]) + AC_LANG_POP + LIBS=$boost_regex_icu_save_libs]) + +if [test x$boost_regex_icu_avail_cv_ = xtrue ]; then + AC_DEFINE([HAVE_BOOST_REGEX_UNICODE], [1], [If the boost_regex library w/ICU is available]) + LIBS="-licuuc $LIBS" +fi + # check for boost_date_time AC_CACHE_CHECK( [if boost_date_time is available], -- cgit v1.2.3 From 01255bdf6c621156c7a77e6ee9cfa46fb2c6f115 Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Sat, 7 Nov 2009 08:59:19 -0500 Subject: Use static Boost when available --- acprep | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'acprep') diff --git a/acprep b/acprep index 6d4a223a..7335e17d 100755 --- a/acprep +++ b/acprep @@ -1000,6 +1000,14 @@ class PrepareBuild(CommandLineApp): self.sys_include_dirs.append('/opt/local/include/boost') + elif exists('/usr/local/lib/libboost_regex-xgcc44-sd-1_40.a'): + self.envvars['BOOST_HOME'] = '/usr/local' + self.envvars['BOOST_SUFFIX'] = '-xgcc44-sd-1_40' + self.log.info('Setting BOOST_SUFFIX => %s' % + self.envvars['BOOST_SUFFIX']) + + self.sys_include_dirs.append('/usr/local/include/boost-1_40') + elif exists('/usr/local/lib/libboost_regex-xgcc44-d-1_40.a'): self.envvars['BOOST_HOME'] = '/usr/local' self.envvars['BOOST_SUFFIX'] = '-xgcc44-d-1_40' -- cgit v1.2.3