diff options
author | John Wiegley <johnw@newartisans.com> | 2009-01-24 04:49:58 -0400 |
---|---|---|
committer | John Wiegley <johnw@newartisans.com> | 2009-01-24 04:49:58 -0400 |
commit | a69649fb7f5ca7e20713ec260c5f989ae82d446f (patch) | |
tree | 41929e6fc4d4d20ffcf0bbbfa681dbe3e3624e21 | |
parent | dff450ab3dbcb0819029c0bd5aee8dd78703a864 (diff) | |
download | ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.tar.gz ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.tar.bz2 ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.zip |
Switched over to using irrxml for parsing XML, rather than expat.
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | Makefile.am | 22 | ||||
-rw-r--r-- | README | 3 | ||||
-rwxr-xr-x | acprep | 3 | ||||
-rw-r--r-- | configure.ac | 55 | ||||
-rw-r--r-- | doc/LICENSE-irrxml | 25 | ||||
-rw-r--r-- | doc/README | 6 | ||||
-rw-r--r-- | doc/ledger.texi | 3 | ||||
m--------- | lib/irrxml | 0 | ||||
-rw-r--r-- | src/gnucash.cc | 32 | ||||
-rw-r--r-- | src/main.cc | 4 | ||||
-rw-r--r-- | src/session.h | 2 | ||||
-rw-r--r-- | src/system.hh | 9 | ||||
-rw-r--r-- | src/textual.cc | 10 | ||||
-rw-r--r-- | src/xml.cc | 89 | ||||
-rw-r--r-- | src/xml.h | 38 |
16 files changed, 151 insertions, 153 deletions
diff --git a/.gitmodules b/.gitmodules index 33b949c0..e68ec8fd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "lib/utfcpp"] path = lib/utfcpp url = git://github.com/jwiegley/utfcpp.git +[submodule "lib/irrxml"] + path = lib/irrxml + url = git://github.com/jwiegley/irrxml.git diff --git a/Makefile.am b/Makefile.am index b87affd3..0c00d947 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,7 +6,8 @@ EXTRA_DIST = autogen.sh contrib lib_LTLIBRARIES = libamounts.la libledger.la libamounts_la_CPPFLAGS = -I$(srcdir)/src -I$(srcdir)/lib \ - -I$(srcdir)/lib/utfcpp/source + -I$(srcdir)/lib/utfcpp/source \ + -I$(srcdir)/lib/irrxml/src if HAVE_GDTOA libamounts_la_CPPFLAGS += -I$(top_builddir)/lib/gdtoa -I$(srcdir)/lib/gdtoa endif @@ -31,15 +32,10 @@ libamounts_la_SOURCES = \ src/format.cc \ src/option.cc \ \ - lib/sha1.cpp + lib/sha1.cpp \ + \ + lib/irrxml/src/irrXML.cpp -if HAVE_EXPAT -libamounts_la_CPPFLAGS += -DHAVE_EXPAT=1 -else -if HAVE_XMLPARSE -libamounts_la_CPPFLAGS += -DHAVE_XMLPARSE=1 -endif -endif if HAVE_LIBOFX libamounts_la_CPPFLAGS += -DHAVE_LIBOFX=1 endif @@ -64,6 +60,7 @@ libledger_la_SOURCES = \ src/qif.cc \ src/xml.cc \ src/csv.cc \ + src/gnucash.cc \ \ src/session.cc \ src/report.cc \ @@ -75,13 +72,6 @@ libledger_la_SOURCES = \ src/reconcile.cc \ src/quotes.cc -if HAVE_EXPAT -libledger_la_SOURCES += src/gnucash.cc -else -if HAVE_XMLPARSE -libledger_la_SOURCES += src/gnucash.cc -endif -endif if HAVE_LIBOFX libledger_la_SOURCES += src/ofx.cc endif @@ -42,7 +42,6 @@ Now, if you wish to proceed in this venture, you'll need a few dependencies: - CppUnit 1.12.1 -- if you're building DEVEL - Optionally, Python 2.4 or higher - - Optionally, libexpat or libxml2 - Optionally, libofx * MacPorts @@ -50,7 +49,7 @@ Now, if you wish to proceed in this venture, you'll need a few dependencies: If you build stuff using MacPorts, as I do, here is what you would run: sudo port install boost +complete+python25 - sudo port install gmp pcre cppunit expat libofx + sudo port install gmp pcre cppunit libofx * Ubuntu @@ -204,9 +204,6 @@ while [ -n "$1" ]; do #LDFLAGS="-Wl,-read_only_relocs,suppress" #LIBS="" - #if [ -f /opt/local/lib/libexpat.a ]; then - # LIBS="$LIBS /opt/local/lib/libexpat.a" - #fi #if [ -f /opt/local/lib/libgmp.a ]; then # LIBS="$LIBS /opt/local/lib/libgmp.a" #fi diff --git a/configure.ac b/configure.ac index 7dd911c2..d7eddf99 100644 --- a/configure.ac +++ b/configure.ac @@ -126,61 +126,6 @@ else AC_MSG_FAILURE("Could not find gmp library (set CPPFLAGS and LDFLAGS?)") fi -# check for expat or xmlparse -AC_CACHE_CHECK( - [if libexpat is available], - [libexpat_avail_cv_], - [libexpat_save_libs=$LIBS - LIBS="-lexpat $LIBS" - AC_LANG_PUSH(C++) - AC_TRY_LINK( - [#include <stdio.h> - extern "C" { - #include <expat.h> // expat XML parser - }], - [XML_Parser parser = XML_ParserCreate(NULL); - return parser != NULL;], - [libexpat_avail_cv_=true], - [libexpat_avail_cv_=false]) - AC_LANG_POP - LIBS=$libexpat_save_libs]) - -if [test x$libexpat_avail_cv_ = xtrue ]; then - AM_CONDITIONAL(HAVE_EXPAT, true) - LIBS="-lexpat $LIBS" -else - AM_CONDITIONAL(HAVE_EXPAT, false) -fi - -if [test x$libexpat_avail_cv_ = xfalse ]; then - AC_CACHE_CHECK( - [if libxmlparse is available], - [libxmlparse_avail_cv_], - [libxmlparse_save_libs=$LIBS - LIBS="-lxmlparse -lxmltok $LIBS" - AC_LANG_PUSH(C++) - AC_TRY_LINK( - [#include <stdio.h> - extern "C" { - #include <xmlparse.h> // expat XML parser - }], - [XML_Parser parser = XML_ParserCreate(NULL); - return parser != NULL;], - [libxmlparse_avail_cv_=true], - [libxmlparse_avail_cv_=false]) - AC_LANG_POP - LIBS=$libxmlparse_save_libs]) - - if [test x$libxmlparse_avail_cv_ = xtrue ]; then - AM_CONDITIONAL(HAVE_XMLPARSE, true) - LIBS="-lxmlparse -lxmltok $LIBS" - else - AM_CONDITIONAL(HAVE_XMLPARSE, false) - fi -else - AM_CONDITIONAL(HAVE_XMLPARSE, false) -fi - # check for boost_regex AC_CACHE_CHECK( [if boost_regex is available], diff --git a/doc/LICENSE-irrxml b/doc/LICENSE-irrxml new file mode 100644 index 00000000..cb04a6f8 --- /dev/null +++ b/doc/LICENSE-irrxml @@ -0,0 +1,25 @@ +The license of irrXML is based on the zlib/libpng license. +Even though this license does not require you to mention that you are +using the Irrlicht Engine in your product, an acknowledgement +would be highly appreciated. + +The irrXML License +=========================== + +Copyright (C) 2002-2005 Nikolaus Gebhardt + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. @@ -38,12 +38,6 @@ will not work), and at least these two libraries installed: (On some GNU/Linux systems, the packages you need to install are called "gmp-dev" and "pcre-dev"). -If you wish to read Gnucash data files, you will also need two XML -libraries, which may or may not be available in a single package (it -depends on your distribution): - - libexpat, or libxmlparse and libxmltok - Once you have determined where the headers and libraries for the above packages are installed, run the script "configure", passing those paths. If you installed everything under /usr/local, you can probably diff --git a/doc/ledger.texi b/doc/ledger.texi index abcb8283..7fe5de54 100644 --- a/doc/ledger.texi +++ b/doc/ledger.texi @@ -3942,7 +3942,6 @@ example: That is the extent of the XML data format used by Ledger. It will output such data if the @command{xml} command is used, and can read -the same data as long as the @file{expat} library was available -when Ledger was built. +the same data. @bye diff --git a/lib/irrxml b/lib/irrxml new file mode 160000 +Subproject e0f5ec13193e413ddcfcf70bcb8886c0c6a8a60 diff --git a/src/gnucash.cc b/src/gnucash.cc index 60c72ed4..ed3a8457 100644 --- a/src/gnucash.cc +++ b/src/gnucash.cc @@ -41,10 +41,12 @@ typedef std::pair<const string, account_t *> accounts_pair; typedef std::map<account_t *, commodity_t *> account_comm_map; typedef std::pair<account_t *, commodity_t *> account_comm_pair; +#if 0 + static journal_t * curr_journal; static account_t * master_account; static account_t * curr_account; -static string curr_account_id; +static string curr_account_id; static entry_t * curr_entry; static commodity_t * entry_comm; static commodity_t * curr_comm; @@ -54,7 +56,7 @@ static XML_Parser current_parser; static accounts_map accounts_by_id; static account_comm_map account_comms; static unsigned int count; -static string have_error; +static string have_error; static std::istream * instreamp; static unsigned int offset; @@ -350,14 +352,28 @@ static void dataHandler(void *, const char *s, int len) } } +#endif + bool gnucash_parser_t::test(std::istream& in) const { - char buf[5]; - in.read(buf, 5); + char buf[80]; + char * p; + + in.read(buf, 11); + if (utf8::is_bom(buf)) + p = &buf[3]; + else + p = buf; + + if (std::strncmp(p, "<?xml", 5) != 0) { + in.clear(); + in.seekg(0, std::ios::beg); + return false; + } + in.clear(); in.seekg(0, std::ios::beg); - - return std::strncmp(buf, "<?xml", 5) == 0; + return true; } unsigned int gnucash_parser_t::parse(std::istream& in, @@ -366,6 +382,7 @@ unsigned int gnucash_parser_t::parse(std::istream& in, account_t * master, const path * original_file) { +#if 0 char buf[BUFSIZ]; #if 0 @@ -428,6 +445,9 @@ unsigned int gnucash_parser_t::parse(std::istream& in, curr_account_id.clear(); return count; +#else + return 0; +#endif } } // namespace ledger diff --git a/src/main.cc b/src/main.cc index d49cfbeb..2f51a5bf 100644 --- a/src/main.cc +++ b/src/main.cc @@ -39,10 +39,8 @@ #include "textual.h" #include "qif.h" -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) #include "xml.h" #include "gnucash.h" -#endif #ifdef HAVE_LIBOFX #include "ofx.h" #endif @@ -469,10 +467,8 @@ int main(int argc, char * argv[], char * envp[]) #if 0 session->register_parser(new ledger::journal_t::binary_parser_t); #endif -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) session->register_parser(new ledger::xml_parser_t); session->register_parser(new ledger::gnucash_parser_t); -#endif #ifdef HAVE_LIBOFX session->register_parser(new ledger::ofx_parser_t); #endif diff --git a/src/session.h b/src/session.h index a0637b2e..84e19531 100644 --- a/src/session.h +++ b/src/session.h @@ -188,9 +188,7 @@ public: This program is made available under the terms of the BSD Public License.\n\ See LICENSE file included with the distribution for details and disclaimer.\n"; std::cout << "\n(modules: gmp, pcre"; -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) std::cout << ", xml"; -#endif #ifdef HAVE_LIBOFX std::cout << ", ofx"; #endif diff --git a/src/system.hh b/src/system.hh index 0dc4d33d..b44678d6 100644 --- a/src/system.hh +++ b/src/system.hh @@ -122,13 +122,8 @@ typedef std::ostream::pos_type ostream_pos_type; #include <gmp.h> -extern "C" { -#if defined(HAVE_EXPAT) -#include <expat.h> // expat XML parser -#elif defined(HAVE_XMLPARSE) -#include <xmlparse.h> // expat XML parser -#endif -} +#include "irrXML.h" // XML parser +#include "CXMLReaderImpl.h" #if defined(HAVE_LIBOFX) #include <libofx.h> diff --git a/src/textual.cc b/src/textual.cc index 4600f3f3..95c90708 100644 --- a/src/textual.cc +++ b/src/textual.cc @@ -605,13 +605,9 @@ bool textual_parser_t::test(std::istream& in) const else p = buf; - if (std::strncmp(p, "<?xml", 5) == 0) { -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) - throw parse_error("Ledger file contains XML data, but format was not recognized"); -#else - throw parse_error("Ledger file contains XML data, but no XML support present"); -#endif - } + if (std::strncmp(p, "<?xml", 5) == 0) + throw_(parse_error, + "Ledger file contains XML data, but format was not recognized"); in.clear(); in.seekg(0, std::ios::beg); @@ -35,10 +35,8 @@ namespace ledger { -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) - -static XML_Parser current_parser; -static unsigned int count; +static irr::io::IrrXMLReader * current_parser; +static unsigned int count; static journal_t * curr_journal; static entry_t * curr_entry; @@ -51,7 +49,7 @@ static string data; static bool ignore; static string have_error; -static void startElement(void *userData, const char *name, const char **attrs) +static void startElement(const char *name) { if (ignore) return; @@ -68,15 +66,15 @@ static void startElement(void *userData, const char *name, const char **attrs) curr_entry->xacts.back()->set_state(curr_state); } else if (std::strcmp(name, "commodity") == 0) { - if (string(attrs[0]) == "flags") - comm_flags = attrs[1]; + if (const char * p = current_parser->getAttributeValue("flags")) + comm_flags = p; } else if (std::strcmp(name, "total") == 0) { ignore = true; } } -static void endElement(void *userData, const char *name) +static void endElement(const char *name) { if (ignore) { if (std::strcmp(name, "total") == 0) @@ -181,27 +179,32 @@ static void endElement(void *userData, const char *name) } } -static void dataHandler(void *userData, const char *s, int len) -{ - if (! ignore) - data = string(s, len); -} - bool xml_parser_t::test(std::istream& in) const { - char buf[80]; + char buf[80]; + char * p; - in.getline(buf, 79); - if (std::strncmp(buf, "<?xml", 5) != 0) { + DEBUG("xml.parse", "Testing whether the file is XML..."); + + in.read(buf, 10); + if (utf8::is_bom(buf)) + p = &buf[3]; + else + p = buf; + + if (std::strncmp(p, "<?xml", 5) != 0) { in.clear(); in.seekg(0, std::ios::beg); + DEBUG("xml.parse", "Does not begin with <?xml"); return false; } + in.getline(buf, 79); // skip rest of <?xml line in.getline(buf, 79); if (! std::strstr(buf, "<ledger")) { in.clear(); in.seekg(0, std::ios::beg); + DEBUG("xml.parse", "Next line does not begin with <ledger"); return false; } @@ -211,11 +214,13 @@ bool xml_parser_t::test(std::istream& in) const } unsigned int xml_parser_t::parse(std::istream& in, - session_t& session, - journal_t& journal, + session_t& session, + journal_t& journal, account_t * master, const path * original_file) { + TRACE_START(xml_parsing_total, 1, "Total time spent parsing XML:"); + char buf[BUFSIZ]; count = 0; @@ -224,47 +229,55 @@ unsigned int xml_parser_t::parse(std::istream& in, curr_comm = NULL; ignore = false; - XML_Parser parser = XML_ParserCreate(NULL); + irr::io::IrrXMLReader * parser = + new irr::io::CXMLReaderImpl<char, irr::io::IXMLBase>(new CStreamReadCallBack(in)); current_parser = parser; - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, dataHandler); + while (parser->read()) { + switch (parser->getNodeType()) { + case irr::io::EXN_TEXT: + DEBUG("xml.parse", "Read text: " << parser->getNodeData()); + if (! ignore) { + DEBUG("xml.parse", " but ignoring it"); + data = parser->getNodeData(); + } + break; + + case irr::io::EXN_ELEMENT: + DEBUG("xml.parse", "Read element: " << parser->getNodeName()); + startElement(parser->getNodeName()); + break; + case irr::io::EXN_ELEMENT_END: + DEBUG("xml.parse", "End element: " << parser->getNodeName()); + endElement(parser->getNodeName()); + break; - while (! in.eof()) { - in.getline(buf, BUFSIZ - 1); - std::strcat(buf, "\n"); - bool result; - try { - result = XML_Parse(parser, buf, std::strlen(buf), in.eof()); - } - catch (const std::exception& err) { - //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++; - XML_ParserFree(parser); - throw parse_error(err.what()); + default: // ignore: COMMENT, CDATA, UNKNOWN + break; } if (! have_error.empty()) { - //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++; parse_error err(have_error); std::cerr << "Error: " << err.what() << std::endl; have_error = ""; } +#if 0 if (! result) { - //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++; const char * err = XML_ErrorString(XML_GetErrorCode(parser)); XML_ParserFree(parser); throw parse_error(err); } +#endif } - XML_ParserFree(parser); + delete parser; + + TRACE_FINISH(xml_parsing_total, 1); return count; } -#endif // defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) - void xml_write_amount(std::ostream& out, const amount_t& amount, const int depth = 0) { @@ -38,7 +38,37 @@ namespace ledger { -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) +class CStreamReadCallBack : public irr::io::IFileReadCallBack +{ + std::istream& in; + std::size_t size; + +public: + //! construct from filename + CStreamReadCallBack(std::istream& _in) : in(_in), size(0) { + TRACE_CTOR(CStreamReadCallBack, "std::istream&"); + } + virtual ~CStreamReadCallBack() { + TRACE_DTOR(CStreamReadCallBack); + } + + virtual int read(void * buffer, int sizeToRead) + { + in.read(static_cast<char *>(buffer), sizeToRead); + return in.gcount(); + } + + virtual int getSize() + { + if (size == 0) { + std::ifstream::pos_type pos = in.tellg(); + in.seekg(0, std::ios_base::end); + size = in.tellg() - pos; + in.seekg(pos, std::ios_base::beg); + } + return size; + } +}; class xml_parser_t : public journal_t::parser_t { @@ -46,14 +76,12 @@ class xml_parser_t : public journal_t::parser_t virtual bool test(std::istream& in) const; virtual unsigned int parse(std::istream& in, - session_t& session, - journal_t& journal, + session_t& session, + journal_t& journal, account_t * master = NULL, const path * original_file = NULL); }; -#endif - class format_xml_entries : public format_entries { bool show_totals; |