summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-01-24 04:49:58 -0400
committerJohn Wiegley <johnw@newartisans.com>2009-01-24 04:49:58 -0400
commita69649fb7f5ca7e20713ec260c5f989ae82d446f (patch)
tree41929e6fc4d4d20ffcf0bbbfa681dbe3e3624e21
parentdff450ab3dbcb0819029c0bd5aee8dd78703a864 (diff)
downloadledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.tar.gz
ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.tar.bz2
ledger-a69649fb7f5ca7e20713ec260c5f989ae82d446f.zip
Switched over to using irrxml for parsing XML, rather than expat.
-rw-r--r--.gitmodules3
-rw-r--r--Makefile.am22
-rw-r--r--README3
-rwxr-xr-xacprep3
-rw-r--r--configure.ac55
-rw-r--r--doc/LICENSE-irrxml25
-rw-r--r--doc/README6
-rw-r--r--doc/ledger.texi3
m---------lib/irrxml0
-rw-r--r--src/gnucash.cc32
-rw-r--r--src/main.cc4
-rw-r--r--src/session.h2
-rw-r--r--src/system.hh9
-rw-r--r--src/textual.cc10
-rw-r--r--src/xml.cc89
-rw-r--r--src/xml.h38
16 files changed, 151 insertions, 153 deletions
diff --git a/.gitmodules b/.gitmodules
index 33b949c0..e68ec8fd 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,3 +13,6 @@
[submodule "lib/utfcpp"]
path = lib/utfcpp
url = git://github.com/jwiegley/utfcpp.git
+[submodule "lib/irrxml"]
+ path = lib/irrxml
+ url = git://github.com/jwiegley/irrxml.git
diff --git a/Makefile.am b/Makefile.am
index b87affd3..0c00d947 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -6,7 +6,8 @@ EXTRA_DIST = autogen.sh contrib
lib_LTLIBRARIES = libamounts.la libledger.la
libamounts_la_CPPFLAGS = -I$(srcdir)/src -I$(srcdir)/lib \
- -I$(srcdir)/lib/utfcpp/source
+ -I$(srcdir)/lib/utfcpp/source \
+ -I$(srcdir)/lib/irrxml/src
if HAVE_GDTOA
libamounts_la_CPPFLAGS += -I$(top_builddir)/lib/gdtoa -I$(srcdir)/lib/gdtoa
endif
@@ -31,15 +32,10 @@ libamounts_la_SOURCES = \
src/format.cc \
src/option.cc \
\
- lib/sha1.cpp
+ lib/sha1.cpp \
+ \
+ lib/irrxml/src/irrXML.cpp
-if HAVE_EXPAT
-libamounts_la_CPPFLAGS += -DHAVE_EXPAT=1
-else
-if HAVE_XMLPARSE
-libamounts_la_CPPFLAGS += -DHAVE_XMLPARSE=1
-endif
-endif
if HAVE_LIBOFX
libamounts_la_CPPFLAGS += -DHAVE_LIBOFX=1
endif
@@ -64,6 +60,7 @@ libledger_la_SOURCES = \
src/qif.cc \
src/xml.cc \
src/csv.cc \
+ src/gnucash.cc \
\
src/session.cc \
src/report.cc \
@@ -75,13 +72,6 @@ libledger_la_SOURCES = \
src/reconcile.cc \
src/quotes.cc
-if HAVE_EXPAT
-libledger_la_SOURCES += src/gnucash.cc
-else
-if HAVE_XMLPARSE
-libledger_la_SOURCES += src/gnucash.cc
-endif
-endif
if HAVE_LIBOFX
libledger_la_SOURCES += src/ofx.cc
endif
diff --git a/README b/README
index 74483d5e..d1f99a8c 100644
--- a/README
+++ b/README
@@ -42,7 +42,6 @@ Now, if you wish to proceed in this venture, you'll need a few dependencies:
- CppUnit 1.12.1 -- if you're building DEVEL
- Optionally, Python 2.4 or higher
- - Optionally, libexpat or libxml2
- Optionally, libofx
* MacPorts
@@ -50,7 +49,7 @@ Now, if you wish to proceed in this venture, you'll need a few dependencies:
If you build stuff using MacPorts, as I do, here is what you would run:
sudo port install boost +complete+python25
- sudo port install gmp pcre cppunit expat libofx
+ sudo port install gmp pcre cppunit libofx
* Ubuntu
diff --git a/acprep b/acprep
index 387ab065..f9a3aab9 100755
--- a/acprep
+++ b/acprep
@@ -204,9 +204,6 @@ while [ -n "$1" ]; do
#LDFLAGS="-Wl,-read_only_relocs,suppress"
#LIBS=""
- #if [ -f /opt/local/lib/libexpat.a ]; then
- # LIBS="$LIBS /opt/local/lib/libexpat.a"
- #fi
#if [ -f /opt/local/lib/libgmp.a ]; then
# LIBS="$LIBS /opt/local/lib/libgmp.a"
#fi
diff --git a/configure.ac b/configure.ac
index 7dd911c2..d7eddf99 100644
--- a/configure.ac
+++ b/configure.ac
@@ -126,61 +126,6 @@ else
AC_MSG_FAILURE("Could not find gmp library (set CPPFLAGS and LDFLAGS?)")
fi
-# check for expat or xmlparse
-AC_CACHE_CHECK(
- [if libexpat is available],
- [libexpat_avail_cv_],
- [libexpat_save_libs=$LIBS
- LIBS="-lexpat $LIBS"
- AC_LANG_PUSH(C++)
- AC_TRY_LINK(
- [#include <stdio.h>
- extern "C" {
- #include <expat.h> // expat XML parser
- }],
- [XML_Parser parser = XML_ParserCreate(NULL);
- return parser != NULL;],
- [libexpat_avail_cv_=true],
- [libexpat_avail_cv_=false])
- AC_LANG_POP
- LIBS=$libexpat_save_libs])
-
-if [test x$libexpat_avail_cv_ = xtrue ]; then
- AM_CONDITIONAL(HAVE_EXPAT, true)
- LIBS="-lexpat $LIBS"
-else
- AM_CONDITIONAL(HAVE_EXPAT, false)
-fi
-
-if [test x$libexpat_avail_cv_ = xfalse ]; then
- AC_CACHE_CHECK(
- [if libxmlparse is available],
- [libxmlparse_avail_cv_],
- [libxmlparse_save_libs=$LIBS
- LIBS="-lxmlparse -lxmltok $LIBS"
- AC_LANG_PUSH(C++)
- AC_TRY_LINK(
- [#include <stdio.h>
- extern "C" {
- #include <xmlparse.h> // expat XML parser
- }],
- [XML_Parser parser = XML_ParserCreate(NULL);
- return parser != NULL;],
- [libxmlparse_avail_cv_=true],
- [libxmlparse_avail_cv_=false])
- AC_LANG_POP
- LIBS=$libxmlparse_save_libs])
-
- if [test x$libxmlparse_avail_cv_ = xtrue ]; then
- AM_CONDITIONAL(HAVE_XMLPARSE, true)
- LIBS="-lxmlparse -lxmltok $LIBS"
- else
- AM_CONDITIONAL(HAVE_XMLPARSE, false)
- fi
-else
- AM_CONDITIONAL(HAVE_XMLPARSE, false)
-fi
-
# check for boost_regex
AC_CACHE_CHECK(
[if boost_regex is available],
diff --git a/doc/LICENSE-irrxml b/doc/LICENSE-irrxml
new file mode 100644
index 00000000..cb04a6f8
--- /dev/null
+++ b/doc/LICENSE-irrxml
@@ -0,0 +1,25 @@
+The license of irrXML is based on the zlib/libpng license.
+Even though this license does not require you to mention that you are
+using the Irrlicht Engine in your product, an acknowledgement
+would be highly appreciated.
+
+The irrXML License
+===========================
+
+Copyright (C) 2002-2005 Nikolaus Gebhardt
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
diff --git a/doc/README b/doc/README
index 8a2406d9..edd2f3be 100644
--- a/doc/README
+++ b/doc/README
@@ -38,12 +38,6 @@ will not work), and at least these two libraries installed:
(On some GNU/Linux systems, the packages you need to install are
called "gmp-dev" and "pcre-dev").
-If you wish to read Gnucash data files, you will also need two XML
-libraries, which may or may not be available in a single package (it
-depends on your distribution):
-
- libexpat, or libxmlparse and libxmltok
-
Once you have determined where the headers and libraries for the above
packages are installed, run the script "configure", passing those
paths. If you installed everything under /usr/local, you can probably
diff --git a/doc/ledger.texi b/doc/ledger.texi
index abcb8283..7fe5de54 100644
--- a/doc/ledger.texi
+++ b/doc/ledger.texi
@@ -3942,7 +3942,6 @@ example:
That is the extent of the XML data format used by Ledger. It will
output such data if the @command{xml} command is used, and can read
-the same data as long as the @file{expat} library was available
-when Ledger was built.
+the same data.
@bye
diff --git a/lib/irrxml b/lib/irrxml
new file mode 160000
+Subproject e0f5ec13193e413ddcfcf70bcb8886c0c6a8a60
diff --git a/src/gnucash.cc b/src/gnucash.cc
index 60c72ed4..ed3a8457 100644
--- a/src/gnucash.cc
+++ b/src/gnucash.cc
@@ -41,10 +41,12 @@ typedef std::pair<const string, account_t *> accounts_pair;
typedef std::map<account_t *, commodity_t *> account_comm_map;
typedef std::pair<account_t *, commodity_t *> account_comm_pair;
+#if 0
+
static journal_t * curr_journal;
static account_t * master_account;
static account_t * curr_account;
-static string curr_account_id;
+static string curr_account_id;
static entry_t * curr_entry;
static commodity_t * entry_comm;
static commodity_t * curr_comm;
@@ -54,7 +56,7 @@ static XML_Parser current_parser;
static accounts_map accounts_by_id;
static account_comm_map account_comms;
static unsigned int count;
-static string have_error;
+static string have_error;
static std::istream * instreamp;
static unsigned int offset;
@@ -350,14 +352,28 @@ static void dataHandler(void *, const char *s, int len)
}
}
+#endif
+
bool gnucash_parser_t::test(std::istream& in) const
{
- char buf[5];
- in.read(buf, 5);
+ char buf[80];
+ char * p;
+
+ in.read(buf, 11);
+ if (utf8::is_bom(buf))
+ p = &buf[3];
+ else
+ p = buf;
+
+ if (std::strncmp(p, "<?xml", 5) != 0) {
+ in.clear();
+ in.seekg(0, std::ios::beg);
+ return false;
+ }
+
in.clear();
in.seekg(0, std::ios::beg);
-
- return std::strncmp(buf, "<?xml", 5) == 0;
+ return true;
}
unsigned int gnucash_parser_t::parse(std::istream& in,
@@ -366,6 +382,7 @@ unsigned int gnucash_parser_t::parse(std::istream& in,
account_t * master,
const path * original_file)
{
+#if 0
char buf[BUFSIZ];
#if 0
@@ -428,6 +445,9 @@ unsigned int gnucash_parser_t::parse(std::istream& in,
curr_account_id.clear();
return count;
+#else
+ return 0;
+#endif
}
} // namespace ledger
diff --git a/src/main.cc b/src/main.cc
index d49cfbeb..2f51a5bf 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -39,10 +39,8 @@
#include "textual.h"
#include "qif.h"
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
#include "xml.h"
#include "gnucash.h"
-#endif
#ifdef HAVE_LIBOFX
#include "ofx.h"
#endif
@@ -469,10 +467,8 @@ int main(int argc, char * argv[], char * envp[])
#if 0
session->register_parser(new ledger::journal_t::binary_parser_t);
#endif
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
session->register_parser(new ledger::xml_parser_t);
session->register_parser(new ledger::gnucash_parser_t);
-#endif
#ifdef HAVE_LIBOFX
session->register_parser(new ledger::ofx_parser_t);
#endif
diff --git a/src/session.h b/src/session.h
index a0637b2e..84e19531 100644
--- a/src/session.h
+++ b/src/session.h
@@ -188,9 +188,7 @@ public:
This program is made available under the terms of the BSD Public License.\n\
See LICENSE file included with the distribution for details and disclaimer.\n";
std::cout << "\n(modules: gmp, pcre";
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
std::cout << ", xml";
-#endif
#ifdef HAVE_LIBOFX
std::cout << ", ofx";
#endif
diff --git a/src/system.hh b/src/system.hh
index 0dc4d33d..b44678d6 100644
--- a/src/system.hh
+++ b/src/system.hh
@@ -122,13 +122,8 @@ typedef std::ostream::pos_type ostream_pos_type;
#include <gmp.h>
-extern "C" {
-#if defined(HAVE_EXPAT)
-#include <expat.h> // expat XML parser
-#elif defined(HAVE_XMLPARSE)
-#include <xmlparse.h> // expat XML parser
-#endif
-}
+#include "irrXML.h" // XML parser
+#include "CXMLReaderImpl.h"
#if defined(HAVE_LIBOFX)
#include <libofx.h>
diff --git a/src/textual.cc b/src/textual.cc
index 4600f3f3..95c90708 100644
--- a/src/textual.cc
+++ b/src/textual.cc
@@ -605,13 +605,9 @@ bool textual_parser_t::test(std::istream& in) const
else
p = buf;
- if (std::strncmp(p, "<?xml", 5) == 0) {
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
- throw parse_error("Ledger file contains XML data, but format was not recognized");
-#else
- throw parse_error("Ledger file contains XML data, but no XML support present");
-#endif
- }
+ if (std::strncmp(p, "<?xml", 5) == 0)
+ throw_(parse_error,
+ "Ledger file contains XML data, but format was not recognized");
in.clear();
in.seekg(0, std::ios::beg);
diff --git a/src/xml.cc b/src/xml.cc
index ccaa65df..0d04c158 100644
--- a/src/xml.cc
+++ b/src/xml.cc
@@ -35,10 +35,8 @@
namespace ledger {
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
-
-static XML_Parser current_parser;
-static unsigned int count;
+static irr::io::IrrXMLReader * current_parser;
+static unsigned int count;
static journal_t * curr_journal;
static entry_t * curr_entry;
@@ -51,7 +49,7 @@ static string data;
static bool ignore;
static string have_error;
-static void startElement(void *userData, const char *name, const char **attrs)
+static void startElement(const char *name)
{
if (ignore)
return;
@@ -68,15 +66,15 @@ static void startElement(void *userData, const char *name, const char **attrs)
curr_entry->xacts.back()->set_state(curr_state);
}
else if (std::strcmp(name, "commodity") == 0) {
- if (string(attrs[0]) == "flags")
- comm_flags = attrs[1];
+ if (const char * p = current_parser->getAttributeValue("flags"))
+ comm_flags = p;
}
else if (std::strcmp(name, "total") == 0) {
ignore = true;
}
}
-static void endElement(void *userData, const char *name)
+static void endElement(const char *name)
{
if (ignore) {
if (std::strcmp(name, "total") == 0)
@@ -181,27 +179,32 @@ static void endElement(void *userData, const char *name)
}
}
-static void dataHandler(void *userData, const char *s, int len)
-{
- if (! ignore)
- data = string(s, len);
-}
-
bool xml_parser_t::test(std::istream& in) const
{
- char buf[80];
+ char buf[80];
+ char * p;
- in.getline(buf, 79);
- if (std::strncmp(buf, "<?xml", 5) != 0) {
+ DEBUG("xml.parse", "Testing whether the file is XML...");
+
+ in.read(buf, 10);
+ if (utf8::is_bom(buf))
+ p = &buf[3];
+ else
+ p = buf;
+
+ if (std::strncmp(p, "<?xml", 5) != 0) {
in.clear();
in.seekg(0, std::ios::beg);
+ DEBUG("xml.parse", "Does not begin with <?xml");
return false;
}
+ in.getline(buf, 79); // skip rest of <?xml line
in.getline(buf, 79);
if (! std::strstr(buf, "<ledger")) {
in.clear();
in.seekg(0, std::ios::beg);
+ DEBUG("xml.parse", "Next line does not begin with <ledger");
return false;
}
@@ -211,11 +214,13 @@ bool xml_parser_t::test(std::istream& in) const
}
unsigned int xml_parser_t::parse(std::istream& in,
- session_t& session,
- journal_t& journal,
+ session_t& session,
+ journal_t& journal,
account_t * master,
const path * original_file)
{
+ TRACE_START(xml_parsing_total, 1, "Total time spent parsing XML:");
+
char buf[BUFSIZ];
count = 0;
@@ -224,47 +229,55 @@ unsigned int xml_parser_t::parse(std::istream& in,
curr_comm = NULL;
ignore = false;
- XML_Parser parser = XML_ParserCreate(NULL);
+ irr::io::IrrXMLReader * parser =
+ new irr::io::CXMLReaderImpl<char, irr::io::IXMLBase>(new CStreamReadCallBack(in));
current_parser = parser;
- XML_SetElementHandler(parser, startElement, endElement);
- XML_SetCharacterDataHandler(parser, dataHandler);
+ while (parser->read()) {
+ switch (parser->getNodeType()) {
+ case irr::io::EXN_TEXT:
+ DEBUG("xml.parse", "Read text: " << parser->getNodeData());
+ if (! ignore) {
+ DEBUG("xml.parse", " but ignoring it");
+ data = parser->getNodeData();
+ }
+ break;
+
+ case irr::io::EXN_ELEMENT:
+ DEBUG("xml.parse", "Read element: " << parser->getNodeName());
+ startElement(parser->getNodeName());
+ break;
+ case irr::io::EXN_ELEMENT_END:
+ DEBUG("xml.parse", "End element: " << parser->getNodeName());
+ endElement(parser->getNodeName());
+ break;
- while (! in.eof()) {
- in.getline(buf, BUFSIZ - 1);
- std::strcat(buf, "\n");
- bool result;
- try {
- result = XML_Parse(parser, buf, std::strlen(buf), in.eof());
- }
- catch (const std::exception& err) {
- //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++;
- XML_ParserFree(parser);
- throw parse_error(err.what());
+ default: // ignore: COMMENT, CDATA, UNKNOWN
+ break;
}
if (! have_error.empty()) {
- //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++;
parse_error err(have_error);
std::cerr << "Error: " << err.what() << std::endl;
have_error = "";
}
+#if 0
if (! result) {
- //unsigned long line = XML_GetCurrentLineNumber(parser) - offset++;
const char * err = XML_ErrorString(XML_GetErrorCode(parser));
XML_ParserFree(parser);
throw parse_error(err);
}
+#endif
}
- XML_ParserFree(parser);
+ delete parser;
+
+ TRACE_FINISH(xml_parsing_total, 1);
return count;
}
-#endif // defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
-
void xml_write_amount(std::ostream& out, const amount_t& amount,
const int depth = 0)
{
diff --git a/src/xml.h b/src/xml.h
index 6630146f..8e1c5af7 100644
--- a/src/xml.h
+++ b/src/xml.h
@@ -38,7 +38,37 @@
namespace ledger {
-#if defined(HAVE_EXPAT) || defined(HAVE_XMLPARSE)
+class CStreamReadCallBack : public irr::io::IFileReadCallBack
+{
+ std::istream& in;
+ std::size_t size;
+
+public:
+ //! construct from filename
+ CStreamReadCallBack(std::istream& _in) : in(_in), size(0) {
+ TRACE_CTOR(CStreamReadCallBack, "std::istream&");
+ }
+ virtual ~CStreamReadCallBack() {
+ TRACE_DTOR(CStreamReadCallBack);
+ }
+
+ virtual int read(void * buffer, int sizeToRead)
+ {
+ in.read(static_cast<char *>(buffer), sizeToRead);
+ return in.gcount();
+ }
+
+ virtual int getSize()
+ {
+ if (size == 0) {
+ std::ifstream::pos_type pos = in.tellg();
+ in.seekg(0, std::ios_base::end);
+ size = in.tellg() - pos;
+ in.seekg(pos, std::ios_base::beg);
+ }
+ return size;
+ }
+};
class xml_parser_t : public journal_t::parser_t
{
@@ -46,14 +76,12 @@ class xml_parser_t : public journal_t::parser_t
virtual bool test(std::istream& in) const;
virtual unsigned int parse(std::istream& in,
- session_t& session,
- journal_t& journal,
+ session_t& session,
+ journal_t& journal,
account_t * master = NULL,
const path * original_file = NULL);
};
-#endif
-
class format_xml_entries : public format_entries
{
bool show_totals;