summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-01-24 04:49:58 -0400
committerJohn Wiegley <johnw@newartisans.com>2009-01-24 04:49:58 -0400
commita69649fb7f5ca7e20713ec260c5f989ae82d446f (patch)
tree41929e6fc4d4d20ffcf0bbbfa681dbe3e3624e21 /src
parentdff450ab3dbcb0819029c0bd5aee8dd78703a864 (diff)
downloadfork-ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.tar.gz
fork-ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.tar.bz2
fork-ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.zip
Switched over to using irrxml for parsing XML, rather than expat.
Diffstat (limited to 'src')
-rw-r--r--src/gnucash.cc32
-rw-r--r--src/main.cc4
-rw-r--r--src/session.h2
-rw-r--r--src/system.hh9
-rw-r--r--src/textual.cc10
-rw-r--r--src/xml.cc89
-rw-r--r--src/xml.h38
7 files changed, 115 insertions, 69 deletions
diff --git a/src/gnucash.cc b/src/gnucash.cc
index 60c72ed4..ed3a8457 100644
--- a/src/gnucash.cc
+++ b/src/gnucash.cc
@@ -41,10 +41,12 @@ typedef std::pair<const string, account_t *> accounts_pair;
typedef std::map<account_t *, commodity_t *> account_comm_map;
typedef std::pair<account_t *, commodity_t *> account_comm_pair;
+#if 0
+
static journal_t * curr_journal;
static account_t * master_account;
static account_t * curr_account;
-static string curr_account_id;
+static string curr_account_id;
static entry_t * curr_entry;
static commodity_t * entry_comm;
static commodity_t * curr_comm;
@@ -54,7 +56,7 @@ static XML_Parser current_parser;
static accounts_map accounts_by_id;
static account_comm_map account_comms;
static unsigned int count;
-static string have_error;
+static string have_error;
static std::istream * instreamp;
static unsigned int offset;
@@ -350,14 +352,28 @@ static void dataHandler(void *, const char *s, int len)
}
}
+#endif
+
bool gnucash_parser_t::test(std::istream& in) const
{
- char buf[5];
- in.read(buf, 5);
+ char buf[80];
+ char * p;
+
+ in.read(buf, 11);
+ if (utf8::is_bom(buf))
+ p = &buf[3];
+ else
+ p = buf;
+
+ if (std::strncmp(p, "<?xml", 5) != 0) {
+ in.clear();
+ in.seekg(0, std::ios::beg);
+ return false;
+ }
+
in.clear();
in.seekg(0, std::ios::beg);
-
- return std::strncmp(buf, "<?xml", 5) == 0;
+ return true;
}
unsigned int gnucash_parser_t::parse(std::istream& in,
@@ -366,6 +382,7 @@ unsigned int gnucash_parser_t::parse(std::istream& in,
account_t * master,
const path * original_file)
{
+#if 0
char buf[BUFSIZ];
#if 0
@@ -428,6 +445,9 @@ unsigned int gnucash_parser_t::parse(std::istream& in,
curr_account_id.clear();
return count;
+#else
+ return 0;
+#endif
}
} // namespace ledger
diff --git a/src/main.cc b/src/main.cc
index d49cfbeb..2f51a5bf 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -39,10 +39,8 @@
#include "textual.h"
#include "qif.h"
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
#include "xml.h"
#include "gnucash.h"
-#endif
#ifdef HAVE_LIBOFX
#include "ofx.h"
#endif
@@ -469,10 +467,8 @@ int main(int argc, char * argv[], char * envp[])
#if 0
session->register_parser(new ledger::journal_t::binary_parser_t);
#endif
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
session->register_parser(new ledger::xml_parser_t);
session->register_parser(new ledger::gnucash_parser_t);
-#endif
#ifdef HAVE_LIBOFX
session->register_parser(new ledger::ofx_parser_t);
#endif
diff --git a/src/session.h b/src/session.h
index a0637b2e..84e19531 100644
--- a/src/session.h
+++ b/src/session.h
@@ -188,9 +188,7 @@ public:
This program is made available under the terms of the BSD Public License.\n\
See LICENSE file included with the distribution for details and disclaimer.\n";
std::cout << "\n(modules: gmp, pcre";
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
std::cout << ", xml";
-#endif
#ifdef HAVE_LIBOFX
std::cout << ", ofx";
#endif
diff --git a/src/system.hh b/src/system.hh
index 0dc4d33d..b44678d6 100644
--- a/src/system.hh
+++ b/src/system.hh
@@ -122,13 +122,8 @@ typedef std::ostream::pos_type ostream_pos_type;
#include <gmp.h>
-extern "C" {
-#if defined(HAVE_EXPAT)
-#include <expat.h> // expat XML parser
-#elif defined(HAVE_XMLPARSE)
-#include <xmlparse.h> // expat XML parser
-#endif
-}
+#include "irrXML.h" // XML parser
+#include "CXMLReaderImpl.h"
#if defined(HAVE_LIBOFX)
#include <libofx.h>
diff --git a/src/textual.cc b/src/textual.cc
index 4600f3f3..95c90708 100644
--- a/src/textual.cc
+++ b/src/textual.cc
@@ -605,13 +605,9 @@ bool textual_parser_t::test(std::istream& in) const
else
p = buf;
- if (std::strncmp(p, "<?xml", 5) == 0) {
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
- throw parse_error("Ledger file contains XML data, but format was not recognized");
-#else
- throw parse_error("Ledger file contains XML data, but no XML support present");
-#endif
- }
+ if (std::strncmp(p, "<?xml", 5) == 0)
+ throw_(parse_error,
+ "Ledger file contains XML data, but format was not recognized");
in.clear();
in.seekg(0, std::ios::beg);
diff --git a/src/xml.cc b/src/xml.cc
index ccaa65df..0d04c158 100644
--- a/src/xml.cc
+++ b/src/xml.cc
@@ -35,10 +35,8 @@
namespace ledger {
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
-
-static XML_Parser current_parser;
-static unsigned int count;
+static irr::io::IrrXMLReader * current_parser;
+static unsigned int count;
static journal_t * curr_journal;
static entry_t * curr_entry;
@@ -51,7 +49,7 @@ static string data;
static bool ignore;
static string have_error;
-static void startElement(void *userData, const char *name, const char **attrs)
+static void startElement(const char *name)
{
if (ignore)
return;
@@ -68,15 +66,15 @@ static void startElement(void *userData, const char *name, const char **attrs)
curr_entry->xacts.back()->set_state(curr_state);
}
else if (std::strcmp(name, "commodity") == 0) {
- if (string(attrs[0]) == "flags")
- comm_flags = attrs[1];
+ if (const char * p = current_parser->getAttributeValue("flags"))
+ comm_flags = p;
}
else if (std::strcmp(name, "total") == 0) {
ignore = true;
}
}
-static void endElement(void *userData, const char *name)
+static void endElement(const char *name)
{
if (ignore) {
if (std::strcmp(name, "total") == 0)
@@ -181,27 +179,32 @@ static void endElement(void *userData, const char *name)
}
}
-static void dataHandler(void *userData, const char *s, int len)
-{
- if (! ignore)
- data = string(s, len);
-}
-
bool xml_parser_t::test(std::istream& in) const
{
- char buf[80];
+ char buf[80];
+ char * p;
- in.getline(buf, 79);
- if (std::strncmp(buf, "<?xml", 5) != 0) {
+ DEBUG("xml.parse", "Testing whether the file is XML...");
+
+ in.read(buf, 10);
+ if (utf8::is_bom(buf))
+ p = &buf[3];
+ else
+ p = buf;
+
+ if (std::strncmp(p, "<?xml", 5) != 0) {
in.clear();
in.seekg(0, std::ios::beg);
+ DEBUG("xml.parse", "Does not begin with <?xml");
return false;
}
+ in.getline(buf, 79); // skip rest of <?xml line
in.getline(buf, 79);
if (! std::strstr(buf, "<ledger")) {
in.clear();
in.seekg(0, std::ios::beg);
+ DEBUG("xml.parse", "Next line does not begin with <ledger");
return false;
}
@@ -211,11 +214,13 @@ bool xml_parser_t::test(std::istream& in) const
}
unsigned int xml_parser_t::parse(std::istream& in,
- session_t& session,
- journal_t& journal,
+ session_t& session,
+ journal_t& journal,
account_t * master,
const path * original_file)
{
+ TRACE_START(xml_parsing_total, 1, "Total time spent parsing XML:");
+
char buf[BUFSIZ];
count = 0;
@@ -224,47 +229,55 @@ unsigned int xml_parser_t::parse(std::istream& in,
curr_comm = NULL;
ignore = false;
- XML_Parser parser = XML_ParserCreate(NULL);
+ irr::io::IrrXMLReader * parser =
+ new irr::io::CXMLReaderImpl<char, irr::io::IXMLBase>(new CStreamReadCallBack(in));
current_parser = parser;
- XML_SetElementHandler(parser, startElement, endElement);
- XML_SetCharacterDataHandler(parser, dataHandler);
+ while (parser->read()) {
+ switch (parser->getNodeType()) {
+ case irr::io::EXN_TEXT:
+ DEBUG("xml.parse", "Read text: " << parser->getNodeData());
+ if (! ignore) {
+ DEBUG("xml.parse", " but ignoring it");
+ data = parser->getNodeData();
+ }
+ break;
+
+ case irr::io::EXN_ELEMENT:
+ DEBUG("xml.parse", "Read element: " << parser->getNodeName());
+ startElement(parser->getNodeName());
+ break;
+ case irr::io::EXN_ELEMENT_END:
+ DEBUG("xml.parse", "End element: " << parser->getNodeName());
+ endElement(parser->getNodeName());
+ break;
- while (! in.eof()) {
- in.getline(buf, BUFSIZ - 1);
- std::strcat(buf, "\n");
- bool result;
- try {
- result = XML_Parse(parser, buf, std::strlen(buf), in.eof());
- }
- catch (const std::exception& err) {
- //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++;
- XML_ParserFree(parser);
- throw parse_error(err.what());
+ default: // ignore: COMMENT, CDATA, UNKNOWN
+ break;
}
if (! have_error.empty()) {
- //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++;
parse_error err(have_error);
std::cerr << "Error: " << err.what() << std::endl;
have_error = "";
}
+#if 0
if (! result) {
- //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++;
const char * err = XML_ErrorString(XML_GetErrorCode(parser));
XML_ParserFree(parser);
throw parse_error(err);
}
+#endif
}
- XML_ParserFree(parser);
+ delete parser;
+
+ TRACE_FINISH(xml_parsing_total, 1);
return count;
}
-#endif // defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
-
void xml_write_amount(std::ostream& out, const amount_t& amount,
const int depth = 0)
{
diff --git a/src/xml.h b/src/xml.h
index 6630146f..8e1c5af7 100644
--- a/src/xml.h
+++ b/src/xml.h
@@ -38,7 +38,37 @@
namespace ledger {
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
+class CStreamReadCallBack : public irr::io::IFileReadCallBack
+{
+ std::istream& in;
+ std::size_t size;
+
+public:
+ //! construct from filename
+ CStreamReadCallBack(std::istream& _in) : in(_in), size(0) {
+ TRACE_CTOR(CStreamReadCallBack, "std::istream&");
+ }
+ virtual ~CStreamReadCallBack() {
+ TRACE_DTOR(CStreamReadCallBack);
+ }
+
+ virtual int read(void * buffer, int sizeToRead)
+ {
+ in.read(static_cast<char *>(buffer), sizeToRead);
+ return in.gcount();
+ }
+
+ virtual int getSize()
+ {
+ if (size == 0) {
+ std::ifstream::pos_type pos = in.tellg();
+ in.seekg(0, std::ios_base::end);
+ size = in.tellg() - pos;
+ in.seekg(pos, std::ios_base::beg);
+ }
+ return size;
+ }
+};
class xml_parser_t : public journal_t::parser_t
{
@@ -46,14 +76,12 @@ class xml_parser_t : public journal_t::parser_t
virtual bool test(std::istream& in) const;
virtual unsigned int parse(std::istream& in,
- session_t& session,
- journal_t& journal,
+ session_t& session,
+ journal_t& journal,
account_t * master = NULL,
const path * original_file = NULL);
};
-#endif
-
class format_xml_entries : public format_entries
{
bool show_totals;