diff options
author | John Wiegley <johnw@newartisans.com> | 2009-01-24 04:49:58 -0400 |
---|---|---|
committer | John Wiegley <johnw@newartisans.com> | 2009-01-24 04:49:58 -0400 |
commit | a69649fb7f5ca7e20713ec260c5f989ae82d446f (patch) | |
tree | 41929e6fc4d4d20ffcf0bbbfa681dbe3e3624e21 /src | |
parent | dff450ab3dbcb0819029c0bd5aee8dd78703a864 (diff) | |
download | fork-ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.tar.gz fork-ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.tar.bz2 fork-ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.zip |
Switched over to using irrxml for parsing XML, rather than expat.
Diffstat (limited to 'src')
-rw-r--r-- | src/gnucash.cc | 32 | ||||
-rw-r--r-- | src/main.cc | 4 | ||||
-rw-r--r-- | src/session.h | 2 | ||||
-rw-r--r-- | src/system.hh | 9 | ||||
-rw-r--r-- | src/textual.cc | 10 | ||||
-rw-r--r-- | src/xml.cc | 89 | ||||
-rw-r--r-- | src/xml.h | 38 |
7 files changed, 115 insertions, 69 deletions
diff --git a/src/gnucash.cc b/src/gnucash.cc index 60c72ed4..ed3a8457 100644 --- a/src/gnucash.cc +++ b/src/gnucash.cc @@ -41,10 +41,12 @@ typedef std::pair<const string, account_t *> accounts_pair; typedef std::map<account_t *, commodity_t *> account_comm_map; typedef std::pair<account_t *, commodity_t *> account_comm_pair; +#if 0 + static journal_t * curr_journal; static account_t * master_account; static account_t * curr_account; -static string curr_account_id; +static string curr_account_id; static entry_t * curr_entry; static commodity_t * entry_comm; static commodity_t * curr_comm; @@ -54,7 +56,7 @@ static XML_Parser current_parser; static accounts_map accounts_by_id; static account_comm_map account_comms; static unsigned int count; -static string have_error; +static string have_error; static std::istream * instreamp; static unsigned int offset; @@ -350,14 +352,28 @@ static void dataHandler(void *, const char *s, int len) } } +#endif + bool gnucash_parser_t::test(std::istream& in) const { - char buf[5]; - in.read(buf, 5); + char buf[80]; + char * p; + + in.read(buf, 11); + if (utf8::is_bom(buf)) + p = &buf[3]; + else + p = buf; + + if (std::strncmp(p, "<?xml", 5) != 0) { + in.clear(); + in.seekg(0, std::ios::beg); + return false; + } + in.clear(); in.seekg(0, std::ios::beg); - - return std::strncmp(buf, "<?xml", 5) == 0; + return true; } unsigned int gnucash_parser_t::parse(std::istream& in, @@ -366,6 +382,7 @@ unsigned int gnucash_parser_t::parse(std::istream& in, account_t * master, const path * original_file) { +#if 0 char buf[BUFSIZ]; #if 0 @@ -428,6 +445,9 @@ unsigned int gnucash_parser_t::parse(std::istream& in, curr_account_id.clear(); return count; +#else + return 0; +#endif } } // namespace ledger diff --git a/src/main.cc b/src/main.cc index d49cfbeb..2f51a5bf 100644 --- a/src/main.cc +++ b/src/main.cc @@ -39,10 +39,8 @@ #include "textual.h" #include "qif.h" -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) #include "xml.h" #include "gnucash.h" -#endif #ifdef HAVE_LIBOFX #include "ofx.h" #endif @@ -469,10 +467,8 @@ int main(int argc, char * argv[], char * envp[]) #if 0 session->register_parser(new ledger::journal_t::binary_parser_t); #endif -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) session->register_parser(new ledger::xml_parser_t); session->register_parser(new ledger::gnucash_parser_t); -#endif #ifdef HAVE_LIBOFX session->register_parser(new ledger::ofx_parser_t); #endif diff --git a/src/session.h b/src/session.h index a0637b2e..84e19531 100644 --- a/src/session.h +++ b/src/session.h @@ -188,9 +188,7 @@ public: This program is made available under the terms of the BSD Public License.\n\ See LICENSE file included with the distribution for details and disclaimer.\n"; std::cout << "\n(modules: gmp, pcre"; -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) std::cout << ", xml"; -#endif #ifdef HAVE_LIBOFX std::cout << ", ofx"; #endif diff --git a/src/system.hh b/src/system.hh index 0dc4d33d..b44678d6 100644 --- a/src/system.hh +++ b/src/system.hh @@ -122,13 +122,8 @@ typedef std::ostream::pos_type ostream_pos_type; #include <gmp.h> -extern "C" { -#if defined(HAVE_EXPAT) -#include <expat.h> // expat XML parser -#elif defined(HAVE_XMLPARSE) -#include <xmlparse.h> // expat XML parser -#endif -} +#include "irrXML.h" // XML parser +#include "CXMLReaderImpl.h" #if defined(HAVE_LIBOFX) #include <libofx.h> diff --git a/src/textual.cc b/src/textual.cc index 4600f3f3..95c90708 100644 --- a/src/textual.cc +++ b/src/textual.cc @@ -605,13 +605,9 @@ bool textual_parser_t::test(std::istream& in) const else p = buf; - if (std::strncmp(p, "<?xml", 5) == 0) { -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) - throw parse_error("Ledger file contains XML data, but format was not recognized"); -#else - throw parse_error("Ledger file contains XML data, but no XML support present"); -#endif - } + if (std::strncmp(p, "<?xml", 5) == 0) + throw_(parse_error, + "Ledger file contains XML data, but format was not recognized"); in.clear(); in.seekg(0, std::ios::beg); @@ -35,10 +35,8 @@ namespace ledger { -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) - -static XML_Parser current_parser; -static unsigned int count; +static irr::io::IrrXMLReader * current_parser; +static unsigned int count; static journal_t * curr_journal; static entry_t * curr_entry; @@ -51,7 +49,7 @@ static string data; static bool ignore; static string have_error; -static void startElement(void *userData, const char *name, const char **attrs) +static void startElement(const char *name) { if (ignore) return; @@ -68,15 +66,15 @@ static void startElement(void *userData, const char *name, const char **attrs) curr_entry->xacts.back()->set_state(curr_state); } else if (std::strcmp(name, "commodity") == 0) { - if (string(attrs[0]) == "flags") - comm_flags = attrs[1]; + if (const char * p = current_parser->getAttributeValue("flags")) + comm_flags = p; } else if (std::strcmp(name, "total") == 0) { ignore = true; } } -static void endElement(void *userData, const char *name) +static void endElement(const char *name) { if (ignore) { if (std::strcmp(name, "total") == 0) @@ -181,27 +179,32 @@ static void endElement(void *userData, const char *name) } } -static void dataHandler(void *userData, const char *s, int len) -{ - if (! ignore) - data = string(s, len); -} - bool xml_parser_t::test(std::istream& in) const { - char buf[80]; + char buf[80]; + char * p; - in.getline(buf, 79); - if (std::strncmp(buf, "<?xml", 5) != 0) { + DEBUG("xml.parse", "Testing whether the file is XML..."); + + in.read(buf, 10); + if (utf8::is_bom(buf)) + p = &buf[3]; + else + p = buf; + + if (std::strncmp(p, "<?xml", 5) != 0) { in.clear(); in.seekg(0, std::ios::beg); + DEBUG("xml.parse", "Does not begin with <?xml"); return false; } + in.getline(buf, 79); // skip rest of <?xml line in.getline(buf, 79); if (! std::strstr(buf, "<ledger")) { in.clear(); in.seekg(0, std::ios::beg); + DEBUG("xml.parse", "Next line does not begin with <ledger"); return false; } @@ -211,11 +214,13 @@ bool xml_parser_t::test(std::istream& in) const } unsigned int xml_parser_t::parse(std::istream& in, - session_t& session, - journal_t& journal, + session_t& session, + journal_t& journal, account_t * master, const path * original_file) { + TRACE_START(xml_parsing_total, 1, "Total time spent parsing XML:"); + char buf[BUFSIZ]; count = 0; @@ -224,47 +229,55 @@ unsigned int xml_parser_t::parse(std::istream& in, curr_comm = NULL; ignore = false; - XML_Parser parser = XML_ParserCreate(NULL); + irr::io::IrrXMLReader * parser = + new irr::io::CXMLReaderImpl<char, irr::io::IXMLBase>(new CStreamReadCallBack(in)); current_parser = parser; - XML_SetElementHandler(parser, startElement, endElement); - XML_SetCharacterDataHandler(parser, dataHandler); + while (parser->read()) { + switch (parser->getNodeType()) { + case irr::io::EXN_TEXT: + DEBUG("xml.parse", "Read text: " << parser->getNodeData()); + if (! ignore) { + DEBUG("xml.parse", " but ignoring it"); + data = parser->getNodeData(); + } + break; + + case irr::io::EXN_ELEMENT: + DEBUG("xml.parse", "Read element: " << parser->getNodeName()); + startElement(parser->getNodeName()); + break; + case irr::io::EXN_ELEMENT_END: + DEBUG("xml.parse", "End element: " << parser->getNodeName()); + endElement(parser->getNodeName()); + break; - while (! in.eof()) { - in.getline(buf, BUFSIZ - 1); - std::strcat(buf, "\n"); - bool result; - try { - result = XML_Parse(parser, buf, std::strlen(buf), in.eof()); - } - catch (const std::exception& err) { - //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++; - XML_ParserFree(parser); - throw parse_error(err.what()); + default: // ignore: COMMENT, CDATA, UNKNOWN + break; } if (! have_error.empty()) { - //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++; parse_error err(have_error); std::cerr << "Error: " << err.what() << std::endl; have_error = ""; } +#if 0 if (! result) { - //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++; const char * err = XML_ErrorString(XML_GetErrorCode(parser)); XML_ParserFree(parser); throw parse_error(err); } +#endif } - XML_ParserFree(parser); + delete parser; + + TRACE_FINISH(xml_parsing_total, 1); return count; } -#endif // defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) - void xml_write_amount(std::ostream& out, const amount_t& amount, const int depth = 0) { @@ -38,7 +38,37 @@ namespace ledger { -#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE) +class CStreamReadCallBack : public irr::io::IFileReadCallBack +{ + std::istream& in; + std::size_t size; + +public: + //! construct from filename + CStreamReadCallBack(std::istream& _in) : in(_in), size(0) { + TRACE_CTOR(CStreamReadCallBack, "std::istream&"); + } + virtual ~CStreamReadCallBack() { + TRACE_DTOR(CStreamReadCallBack); + } + + virtual int read(void * buffer, int sizeToRead) + { + in.read(static_cast<char *>(buffer), sizeToRead); + return in.gcount(); + } + + virtual int getSize() + { + if (size == 0) { + std::ifstream::pos_type pos = in.tellg(); + in.seekg(0, std::ios_base::end); + size = in.tellg() - pos; + in.seekg(pos, std::ios_base::beg); + } + return size; + } +}; class xml_parser_t : public journal_t::parser_t { @@ -46,14 +76,12 @@ class xml_parser_t : public journal_t::parser_t virtual bool test(std::istream& in) const; virtual unsigned int parse(std::istream& in, - session_t& session, - journal_t& journal, + session_t& session, + journal_t& journal, account_t * master = NULL, const path * original_file = NULL); }; -#endif - class format_xml_entries : public format_entries { bool show_totals; |