diff options
author | John Wiegley <johnw@newartisans.com> | 2007-05-21 20:42:05 +0000 |
---|---|---|
committer | John Wiegley <johnw@newartisans.com> | 2008-04-13 03:39:06 -0400 |
commit | 7380da43ab403dacb41d2010093d11942bb7cec1 (patch) | |
tree | 1b9db99b018695254584fe9f8b9ca34a4aa073cb /src/traversal | |
parent | f12d41f233d460bd6d2eb8efb90bf6e36e994a30 (diff) | |
download | fork-ledger-7380da43ab403dacb41d2010093d11942bb7cec1.tar.gz fork-ledger-7380da43ab403dacb41d2010093d11942bb7cec1.tar.bz2 fork-ledger-7380da43ab403dacb41d2010093d11942bb7cec1.zip |
Many changes.
Diffstat (limited to 'src/traversal')
-rw-r--r-- | src/traversal/abbrev.cc | 94 | ||||
-rw-r--r-- | src/traversal/abbrev.h | 23 | ||||
-rw-r--r-- | src/traversal/transform.cc | 357 | ||||
-rw-r--r-- | src/traversal/transform.h | 164 | ||||
-rw-r--r-- | src/traversal/xpath.cc | 1670 | ||||
-rw-r--r-- | src/traversal/xpath.h | 873 |
6 files changed, 3181 insertions, 0 deletions
diff --git a/src/traversal/abbrev.cc b/src/traversal/abbrev.cc new file mode 100644 index 00000000..089b8342 --- /dev/null +++ b/src/traversal/abbrev.cc @@ -0,0 +1,94 @@ +#include "abbrev.h" + +namespace ledger { + +string abbreviate(const string& str, + unsigned int width, + elision_style_t elision_style, + const bool is_account, + int abbrev_length) +{ + const unsigned int len = str.length(); + if (len <= width) + return str; + + assert(width < 4095); + + static char buf[4096]; + + switch (elision_style) { + case TRUNCATE_LEADING: + // This method truncates at the beginning. + std::strncpy(buf, str.c_str() + (len - width), width); + buf[0] = '.'; + buf[1] = '.'; + break; + + case TRUNCATE_MIDDLE: + // This method truncates in the middle. + std::strncpy(buf, str.c_str(), width / 2); + std::strncpy(buf + width / 2, + str.c_str() + (len - (width / 2 + width % 2)), + width / 2 + width % 2); + buf[width / 2 - 1] = '.'; + buf[width / 2] = '.'; + break; + + case ABBREVIATE: + if (is_account) { + std::list<string> parts; + string::size_type beg = 0; + for (string::size_type pos = str.find(':'); + pos != string::npos; + beg = pos + 1, pos = str.find(':', beg)) + parts.push_back(string(str, beg, pos - beg)); + parts.push_back(string(str, beg)); + + string result; + unsigned int newlen = len; + for (std::list<string>::iterator i = parts.begin(); + i != parts.end(); + i++) { + // Don't contract the last element + std::list<string>::iterator x = i; + if (++x == parts.end()) { + result += *i; + break; + } + + if (newlen > width) { + result += string(*i, 0, abbrev_length); + result += ":"; + newlen -= (*i).length() - abbrev_length; + } else { + result += *i; + result += ":"; + } + } + + if (newlen > width) { + // Even abbreviated its too big to show the last account, so + // abbreviate all but the last and truncate at the beginning. + std::strncpy(buf, result.c_str() + (result.length() - width), width); + buf[0] = '.'; + buf[1] = '.'; + } else { + std::strcpy(buf, result.c_str()); + } + break; + } + // fall through... + + case TRUNCATE_TRAILING: + // This method truncates at the end (the default). + std::strncpy(buf, str.c_str(), width - 2); + buf[width - 2] = '.'; + buf[width - 1] = '.'; + break; + } + buf[width] = '\0'; + + return buf; +} + +} // namespace ledger diff --git a/src/traversal/abbrev.h b/src/traversal/abbrev.h new file mode 100644 index 00000000..ad880e45 --- /dev/null +++ b/src/traversal/abbrev.h @@ -0,0 +1,23 @@ +#ifndef _ABBREV_H +#define _ABBREV_H + +#include "utils.h" + +namespace ledger { + +enum elision_style_t { + TRUNCATE_TRAILING, + TRUNCATE_MIDDLE, + TRUNCATE_LEADING, + ABBREVIATE +}; + +string abbreviate(const string& str, + unsigned int width, + elision_style_t elision_style = TRUNCATE_TRAILING, + const bool is_account = false, + int abbrev_length = 2); + +} // namespace ledger + +#endif // _ABBREV_H diff --git a/src/traversal/transform.cc b/src/traversal/transform.cc new file mode 100644 index 00000000..3331c2f3 --- /dev/null +++ b/src/traversal/transform.cc @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2003-2007, John Wiegley. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of New Artisans LLC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "transform.h" + +namespace ledger { + +#if 0 +void populate_account(account_t& acct, xml::document_t& document) +{ + if (! acct.parent) + return; + + account_repitem_t * acct_item; + if (acct.data == NULL) { + acct.data = acct_item = + static_cast<account_repitem_t *>(repitem_t::wrap(&acct)); + if (acct.parent) { + if (acct.parent->data == NULL) + populate_account(*acct.parent, acct_item); + else + static_cast<account_repitem_t *>(acct.parent->data)-> + add_child(acct_item); + } + } else { + acct_item = static_cast<account_repitem_t *>(acct.data); + } + + if (item->kind == repitem_t::ACCOUNT) + acct_item->add_child(item); + else + acct_item->add_content(item); +} + +class populate_accounts : public repitem_t::select_callback_t { + virtual void operator()(xml::document_t& document) { + if (item->kind == repitem_t::TRANSACTION) { + item->extract(); + populate_account(*static_cast<xact_repitem_t *>(item)->account(), item); + } + } +}; + +class clear_account_data : public repitem_t::select_callback_t { + virtual void operator()(xml::document_t& document) { + if (item->kind == repitem_t::ACCOUNT) + static_cast<account_repitem_t *>(item)->account->data = NULL; + } +}; + +void accounts_transform::execute(xml::document_t& document) +{ + populate_accounts cb1; + items->select_all(cb1); + + for (repitem_t * j = items->children; j; j = j->next) { + assert(j->kind == repitem_t::JOURNAL); + + j->clear(); + + for (accounts_map::iterator i = j->journal->master->accounts.begin(); + i != j->journal->master->accounts.end(); + i++) { + assert((*i).second->data); + j->add_child(static_cast<account_repitem_t *>((*i).second->data)); + (*i).second->data = NULL; + } + } + + clear_account_data cb2; + items->select_all(cb2); +} + +void compact_transform::execute(xml::document_t& document) +{ + for (repitem_t * i = items; i; i = i->next) { + if (i->kind == repitem_t::ACCOUNT) { + while (! i->contents && + i->children && ! i->children->next) { + account_repitem_t * p = static_cast<account_repitem_t *>(i); + i = p->children; + p->children = NULL; + p->last_child = NULL; + + i->set_parent(p->parent); + p->set_parent(NULL); + i->prev = p->prev; + if (p->prev) + p->prev->next = i; + p->prev = NULL; + i->next = p->next; + if (p->next) + p->next->prev = i; + p->next = NULL; + + if (i->parent->children == p) + i->parent->children = i; + if (i->parent->last_child == p) + i->parent->last_child = i; + + account_repitem_t * acct = static_cast<account_repitem_t *>(i); + acct->parents_elided = p->parents_elided + 1; + + checked_delete(p); + } + } + + if (i->children) + execute(i->children); + } +} + +void clean_transform::execute(xml::document_t& document) +{ + repitem_t * i = items; + while (i) { + if (i->kind == repitem_t::ACCOUNT) { + value_t temp; + i->add_total(temp); + if (! temp) { + repitem_t * next = i->next; + checked_delete(i); + i = next; + continue; + } + } +#if 0 + else if (i->kind == repitem_t::ENTRY && ! i->contents) { + assert(! i->children); + repitem_t * next = i->next; + checked_delete(i); + i = next; + continue; + } +#endif + + if (i->children) + execute(i->children); + + i = i->next; + } +} + +void entries_transform::execute(xml::document_t& document) +{ +} + +void optimize_transform::execute(xml::document_t& document) +{ + for (repitem_t * i = items; i; i = i->next) { + if (i->kind == repitem_t::ENTRY) { + if (i->contents && + i->contents->next && + ! i->contents->next->next) { // exactly two transactions + xact_repitem_t * first = + static_cast<xact_repitem_t *>(i->contents); + xact_repitem_t * second = + static_cast<xact_repitem_t *>(i->contents->next); + if (first->xact->amount == - second->xact->amount) + ; + } + } + + if (i->children) + execute(i->children); + } +} + +void split_transform::execute(xml::document_t& document) +{ + for (repitem_t * i = items; i; i = i->next) { + if (i->contents && i->contents->next) { + repitem_t * j; + + switch (i->kind) { + case repitem_t::TRANSACTION: + assert(false); + j = new xact_repitem_t(static_cast<xact_repitem_t *>(i)->xact); + break; + case repitem_t::ENTRY: + j = new entry_repitem_t(static_cast<entry_repitem_t *>(i)->entry); + break; + case repitem_t::ACCOUNT: + j = new account_repitem_t(static_cast<account_repitem_t *>(i)->account); + break; + default: + j = new repitem_t(i->kind); + break; + } + + j->set_parent(i->parent); + j->prev = i; + j->next = i->next; + i->next = j; + + j->contents = i->contents->next; + j->contents->prev = NULL; + j->contents->set_parent(j); + i->contents->next = NULL; + + j->last_content = i->last_content; + if (j->contents == i->last_content) + i->last_content = i->contents; + } + + if (i->children) + execute(i->children); + } +} + +void merge_transform::execute(xml::document_t& document) +{ + for (repitem_t * i = items; i; i = i->next) { + if (i->next) { + assert(i->kind == i->next->kind); + bool merge = false; + switch (i->kind) { + case repitem_t::TRANSACTION: + assert(false); + break; + case repitem_t::ENTRY: + if (static_cast<entry_repitem_t *>(i)->entry == + static_cast<entry_repitem_t *>(i->next)->entry) + merge = true; + break; + case repitem_t::ACCOUNT: +#if 0 + if (static_cast<account_repitem_t *>(i)->account == + static_cast<account_repitem_t *>(i->next)->account) + merge = true; +#endif + break; + default: + break; + } + + if (merge) { + repitem_t * j = i->next; + + i->next = i->next->next; + if (i->next) + i->next->prev = i; + + for (repitem_t * k = j->contents; k; k = k->next) + k->set_parent(i); + + i->last_content->next = j->contents; + i->last_content = j->last_content; + + j->contents = NULL; + assert(! j->children); + checked_delete(j); + } + } + + if (i->children) + execute(i->children); + } +} + +namespace { +#define REPITEM_FLAGGED 0x1 + + class mark_selected : public repitem_t::select_callback_t { + virtual void operator()(xml::document_t& document) { + item->flags |= REPITEM_FLAGGED; + } + }; + + class mark_selected_and_ancestors : public repitem_t::select_callback_t { + virtual void operator()(xml::document_t& document) { + while (item->parent) { + item->flags |= REPITEM_FLAGGED; + item = item->parent; + } + } + }; + + class delete_unmarked : public repitem_t::select_callback_t { + virtual void operator()(xml::document_t& document) { + if (item->parent && ! (item->flags & REPITEM_FLAGGED)) + checked_delete(item); + } + }; + + class delete_marked : public repitem_t::select_callback_t { + virtual void operator()(xml::document_t& document) { + if (item->flags & REPITEM_FLAGGED) + checked_delete(item); + } + }; + + class clear_flags : public repitem_t::select_callback_t { + virtual void operator()(xml::document_t& document) { + item->flags = 0; + } + }; +} + +void select_transform::execute(xml::document_t& document) +{ + if (! path) { + items->clear(); + return; + } + mark_selected_and_ancestors cb1; + items->select(path, cb1); + + delete_unmarked cb2; + items->select_all(cb2); + clear_flags cb3; + items->select_all(cb3); +} + +void remove_transform::execute(xml::document_t& document) +{ + if (! path) + return; + mark_selected cb1; + items->select(path, cb1); + + delete_marked cb2; + items->select_all(cb2); + clear_flags cb3; + items->select_all(cb3); +} +#endif + +} // namespace ledger diff --git a/src/traversal/transform.h b/src/traversal/transform.h new file mode 100644 index 00000000..158b9b6a --- /dev/null +++ b/src/traversal/transform.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2003-2007, John Wiegley. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of New Artisans LLC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TRANSFORM_H +#define _TRANSFORM_H + +#include "xpath.h" + +namespace ledger { + +class transform_t { + public: + virtual ~transform_t() {} + virtual value_t operator()(xml::xpath_t::scope_t& args) = 0; +}; + +class check_transform : public transform_t { + // --check checks the validity of the item list. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class accounts_transform : public transform_t { + // --accounts transforms the report tree into an account-wise view. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class compact_transform : public transform_t { + // --compact compacts an account tree to remove accounts with only + // one child account. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class clean_transform : public transform_t { + // --clean clears out entries and accounts that have no contents. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class entries_transform : public transform_t { + // --entries transforms the report tree into an entries-wise view. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class optimize_transform : public transform_t { + // --optimize optimizes entries for display by the print command. + // What this means is that if an entry has two transactions of the + // commodity (one the negative of the other), the amount of the + // second transaction will be nulled out. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class split_transform : public transform_t { + // --split breaks entry with two or more transactions into what + // seems like two entries each with one transaction -- even though + // it is the same entry being reported in both cases. This is + // useful before sorting, for exampel, in order to sort by + // transaction instead of by entry. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class merge_transform : public transform_t { + // --merge is the opposite of --split: any adjacent transactions + // which share the same entry will be merged into a group of + // transactions under one reported entry. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class combine_transform : public transform_t { + // --combine EXPR combines all transactions matching EXPR so that + // they appear within the same virtual entry (whose date will span + // the earliest to the latest of those entries, and whose payee name + // will show the terminating date or a label that is characteristic + // of the set). + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class group_transform : public transform_t { + // --group groups all transactions that affect the same account + // within an entry, so that they appear as a single transaction. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class collapse_transform : public transform_t { + // --collapse makes all transactions within an entry appear as a + // single transaction, even if they affect different accounts. The + // fictitous account "<total>" is used to represent the final sum, + // if multiple accounts are involved. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class subtotal_transform : public transform_t { + // --subtotal will combine the transactions from all entries into + // one giant entry. When used in conjunction with --group, the + // affect is very similar to a regular balance report. + public: + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +#if 0 +class select_transform : public transform_t +{ + protected: + xml::xpath_t xpath; + + public: + select_transform(const string& selection_path) { + xpath.parse(selection_path); + } + virtual ~select_transform() {} + + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; + +class remove_transform : public select_transform +{ + public: + remove_transform(const string& selection_path) + : select_transform(selection_path) {} + + virtual value_t operator()(xml::xpath_t::call_scope_t& args); +}; +#endif + +} // namespace ledger + +#endif // _TRANSFORM_H diff --git a/src/traversal/xpath.cc b/src/traversal/xpath.cc new file mode 100644 index 00000000..ed4c0544 --- /dev/null +++ b/src/traversal/xpath.cc @@ -0,0 +1,1670 @@ +/* + * Copyright (c) 2003-2007, John Wiegley. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of New Artisans LLC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "xpath.h" +#include "parser.h" + +namespace ledger { +namespace xml { + +#ifndef THREADSAFE +xpath_t::token_t * xpath_t::lookahead = NULL; +#endif + +void xpath_t::initialize() +{ + lookahead = new xpath_t::token_t; +} + +void xpath_t::shutdown() +{ + checked_delete(lookahead); + lookahead = NULL; +} + +void xpath_t::token_t::parse_ident(std::istream& in) +{ + if (in.eof()) { + kind = TOK_EOF; + return; + } + assert(in.good()); + + char c = peek_next_nonws(in); + + if (in.eof()) { + kind = TOK_EOF; + return; + } + assert(in.good()); + + kind = IDENT; + length = 0; + + char buf[256]; + READ_INTO_(in, buf, 255, c, length, + std::isalnum(c) || c == '_' || c == '.' || c == '-'); + + switch (buf[0]) { + case 'a': + if (std::strcmp(buf, "and") == 0) + kind = KW_AND; + break; + case 'd': + if (std::strcmp(buf, "div") == 0) + kind = KW_DIV; + break; + case 'e': + if (std::strcmp(buf, "eq") == 0) + kind = EQUAL; + break; + case 'f': + if (std::strcmp(buf, "false") == 0) { + kind = VALUE; + value = false; + } + break; + case 'g': + if (std::strcmp(buf, "gt") == 0) + kind = GREATER; + else if (std::strcmp(buf, "ge") == 0) + kind = GREATEREQ; + break; + case 'i': + if (std::strcmp(buf, "is") == 0) + kind = EQUAL; + break; + case 'l': + if (std::strcmp(buf, "lt") == 0) + kind = LESS; + else if (std::strcmp(buf, "le") == 0) + kind = LESSEQ; + break; + case 'm': + if (std::strcmp(buf, "mod") == 0) + kind = KW_MOD; + break; + case 'n': + if (std::strcmp(buf, "ne") == 0) + kind = NEQUAL; + break; + case 'o': + if (std::strcmp(buf, "or") == 0) + kind = KW_OR; + break; + case 't': + if (std::strcmp(buf, "true") == 0) { + kind = VALUE; + value = true; + } + break; + case 'u': + if (std::strcmp(buf, "union") == 0) + kind = KW_UNION; + break; + } + + if (kind == IDENT) + value.set_string(buf); +} + +void xpath_t::token_t::next(std::istream& in, flags_t flags) +{ + if (in.eof()) { + kind = TOK_EOF; + return; + } + assert(in.good()); + + char c = peek_next_nonws(in); + + if (in.eof()) { + kind = TOK_EOF; + return; + } + assert(in.good()); + + symbol[0] = c; + symbol[1] = '\0'; + + length = 1; + + if (! (flags & XPATH_PARSE_RELAXED) && + (std::isalpha(c) || c == '_')) { + parse_ident(in); + return; + } + + switch (c) { + case '@': + in.get(c); + kind = AT_SYM; + break; + case '$': + in.get(c); + kind = DOLLAR; + break; + + case '(': + in.get(c); + kind = LPAREN; + break; + case ')': + in.get(c); + kind = RPAREN; + break; + + case '[': { + in.get(c); + if (flags & XPATH_PARSE_ALLOW_DATE) { + char buf[256]; + READ_INTO_(in, buf, 255, c, length, c != ']'); + if (c != ']') + unexpected(c, ']'); + in.get(c); + length++; + interval_t timespan(buf); + kind = VALUE; + value = timespan.next(); + } else { + kind = LBRACKET; + } + break; + } + + case ']': { + in.get(c); + kind = RBRACKET; + break; + } + + case '\'': + case '"': { + char delim; + in.get(delim); + char buf[4096]; + READ_INTO_(in, buf, 4095, c, length, c != delim); + if (c != delim) + unexpected(c, delim); + in.get(c); + length++; + kind = VALUE; + value.set_string(buf); + break; + } + + case '{': { + in.get(c); + amount_t temp; + temp.parse(in, AMOUNT_PARSE_NO_MIGRATE); + in.get(c); + if (c != '}') + unexpected(c, '}'); + length++; + kind = VALUE; + value = temp; + break; + } + + case '!': + in.get(c); + c = in.peek(); + if (c == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = NEQUAL; + length = 2; + break; + } + kind = EXCLAM; + break; + + case '-': + in.get(c); + kind = MINUS; + break; + case '+': + in.get(c); + kind = PLUS; + break; + + case '*': + in.get(c); + kind = STAR; + break; + + case '/': + in.get(c); + kind = SLASH; + break; + + case '=': + in.get(c); + kind = EQUAL; + break; + + case '<': + in.get(c); + if (in.peek() == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = LESSEQ; + length = 2; + break; + } + kind = LESS; + break; + + case '>': + in.get(c); + if (in.peek() == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = GREATEREQ; + length = 2; + break; + } + kind = GREATER; + break; + + case '|': + in.get(c); + kind = PIPE; + break; + case ',': + in.get(c); + kind = COMMA; + break; + + case '.': + in.get(c); + c = in.peek(); + if (c == '.') { + in.get(c); + length++; + kind = DOTDOT; + break; + } + else if (! std::isdigit(c)) { + kind = DOT; + break; + } + in.unget(); // put the first '.' back + // fall through... + + default: + if (! (flags & XPATH_PARSE_RELAXED)) { + kind = UNKNOWN; + } else { + amount_t temp; + unsigned long pos = 0; + + // When in relaxed parsing mode, we want to migrate commodity + // flags so that any precision specified by the user updates the + // current maximum displayed precision. + try { + pos = (long)in.tellg(); + + unsigned char parse_flags = 0; + if (flags & XPATH_PARSE_NO_MIGRATE) + parse_flags |= AMOUNT_PARSE_NO_MIGRATE; + if (flags & XPATH_PARSE_NO_REDUCE) + parse_flags |= AMOUNT_PARSE_NO_REDUCE; + + temp.parse(in, parse_flags); + + kind = VALUE; + value = temp; + } + catch (amount_error& err) { + // If the amount had no commodity, it must be an unambiguous + // variable reference + + // jww (2007-04-19): There must be a more efficient way to do this! + if (std::strcmp(err.what(), "No quantity specified for amount") == 0) { + in.clear(); + in.seekg(pos, std::ios::beg); + + c = in.peek(); + assert(! (std::isdigit(c) || c == '.')); + parse_ident(in); + } else { + throw; + } + } + } + break; + } +} + +void xpath_t::token_t::rewind(std::istream& in) +{ + for (unsigned int i = 0; i < length; i++) + in.unget(); +} + + +void xpath_t::token_t::unexpected() +{ + switch (kind) { + case TOK_EOF: + throw_(parse_error, "Unexpected end of expression"); + case IDENT: + throw_(parse_error, "Unexpected symbol '" << value << "'"); + case VALUE: + throw_(parse_error, "Unexpected value '" << value << "'"); + default: + throw_(parse_error, "Unexpected operator '" << symbol << "'"); + } +} + +void xpath_t::token_t::unexpected(char c, char wanted) +{ + if ((unsigned char) c == 0xff) { + if (wanted) + throw_(parse_error, "Missing '" << wanted << "'"); + else + throw_(parse_error, "Unexpected end"); + } else { + if (wanted) + throw_(parse_error, "Invalid char '" << c << + "' (wanted '" << wanted << "')"); + else + throw_(parse_error, "Invalid char '" << c << "'"); + } +} + + +void xpath_t::scope_t::define(const string& name, const value_t& val) { + define(name, op_t::wrap_value(val)); +} + +void xpath_t::symbol_scope_t::define(const string& name, ptr_op_t def) +{ + DEBUG("ledger.xpath.syms", "Defining '" << name << "' = " << def); + + std::pair<symbol_map::iterator, bool> result + = symbols.insert(symbol_map::value_type(name, def)); + if (! result.second) { + symbol_map::iterator i = symbols.find(name); + assert(i != symbols.end()); + symbols.erase(i); + + std::pair<symbol_map::iterator, bool> result2 + = symbols.insert(symbol_map::value_type(name, def)); + if (! result2.second) + throw_(compile_error, + "Redefinition of '" << name << "' in same scope"); + } +} + +namespace { + value_t xpath_fn_last(xpath_t::call_scope_t& scope) + { + xpath_t::context_scope_t& context(CONTEXT_SCOPE(scope)); + return context.size(); + } + + value_t xpath_fn_position(xpath_t::call_scope_t& scope) + { + xpath_t::context_scope_t& context(CONTEXT_SCOPE(scope)); + return context.index() + 1; + } + + value_t xpath_fn_text(xpath_t::call_scope_t& scope) + { + xpath_t::context_scope_t& context(CONTEXT_SCOPE(scope)); + return value_t(context.xml_node().to_value().to_string(), true); + } + + value_t xpath_fn_type(xpath_t::call_scope_t& scope) + { + if (scope.size() == 0) { + xpath_t::context_scope_t& context(CONTEXT_SCOPE(scope)); + return string_value(context.value().label()); + } + else if (scope.size() == 1) { + return string_value(scope[0].label()); + } + else { + assert(false); + return string_value("INVALID"); + } + } +} + +xpath_t::ptr_op_t +xpath_t::symbol_scope_t::lookup(const string& name) +{ + switch (name[0]) { + case 'l': + if (name == "last") + return WRAP_FUNCTOR(bind(xpath_fn_last, _1)); + break; + + case 'p': + if (name == "position") + return WRAP_FUNCTOR(bind(xpath_fn_position, _1)); + break; + + case 't': + if (name == "text") + return WRAP_FUNCTOR(bind(xpath_fn_text, _1)); + else if (name == "type") + return WRAP_FUNCTOR(bind(xpath_fn_type, _1)); + break; + } + + symbol_map::const_iterator i = symbols.find(name); + if (i != symbols.end()) + return (*i).second; + + return child_scope_t::lookup(name); +} + + +xpath_t::ptr_op_t +xpath_t::parse_value_term(std::istream& in, flags_t tflags) const +{ + ptr_op_t node; + + token_t& tok = next_token(in, tflags); + + switch (tok.kind) { + case token_t::VALUE: + node = new op_t(op_t::VALUE); + node->set_value(tok.value); + break; + + case token_t::IDENT: { +#if 0 +#ifdef USE_BOOST_PYTHON + if (tok.value->as_string() == "lambda") // special + try { + char c, buf[4096]; + + std::strcpy(buf, "lambda "); + READ_INTO(in, &buf[7], 4000, c, true); + + ptr_op_t eval = new op_t(op_t::O_EVAL); + ptr_op_t lambda = new op_t(op_t::FUNCTION); + lambda->functor = new python_functor_t(python_eval(buf)); + eval->set_left(lambda); + ptr_op_t sym = new op_t(op_t::SYMBOL); + sym->name = new string("__ptr"); + eval->set_right(sym); + + node = eval; + + goto done; + } + catch(const boost::python::error_already_set&) { + throw_(parse_error, "Error parsing lambda expression"); + } +#endif /* USE_BOOST_PYTHON */ +#endif + + string ident = tok.value.as_string(); + + // An identifier followed by ( represents a function call + tok = next_token(in, tflags); + if (tok.kind == token_t::LPAREN) { + node = new op_t(op_t::FUNC_NAME); + node->set_string(ident); + + ptr_op_t call_node(new op_t(op_t::O_CALL)); + call_node->set_left(node); + call_node->set_right(parse_value_expr(in, tflags | XPATH_PARSE_PARTIAL)); + + tok = next_token(in, tflags); + if (tok.kind != token_t::RPAREN) + tok.unexpected(0xff, ')'); + + node = call_node; + } else { + if (std::isdigit(ident[0])) { + node = new op_t(op_t::ARG_INDEX); + node->set_long(lexical_cast<unsigned int>(ident.c_str())); + } + else if (optional<node_t::nameid_t> id = + document_t::lookup_builtin_id(ident)) { + node = new op_t(op_t::NODE_ID); + node->set_name(*id); + } + else { + node = new op_t(op_t::NODE_NAME); + node->set_string(ident); + } + push_token(tok); + } + break; + } + + case token_t::AT_SYM: { + tok = next_token(in, tflags); + if (tok.kind != token_t::IDENT) + throw_(parse_error, "@ symbol must be followed by attribute name"); + + string ident = tok.value.as_string(); + if (optional<node_t::nameid_t> id = document_t::lookup_builtin_id(ident)) { + node = new op_t(op_t::ATTR_ID); + node->set_name(*id); + } + else { + node = new op_t(op_t::ATTR_NAME); + node->set_string(ident); + } + break; + } + + case token_t::DOLLAR: + tok = next_token(in, tflags); + if (tok.kind != token_t::IDENT) + throw parse_error("$ symbol must be followed by variable name"); + + node = new op_t(op_t::VAR_NAME); + node->set_string(tok.value.as_string()); + break; + + case token_t::DOT: + node = new op_t(op_t::NODE_ID); + node->set_name(document_t::CURRENT); + break; + case token_t::DOTDOT: + node = new op_t(op_t::NODE_ID); + node->set_name(document_t::PARENT); + break; + case token_t::SLASH: + node = new op_t(op_t::NODE_ID); + node->set_name(document_t::ROOT); + push_token(); + break; + case token_t::STAR: + node = new op_t(op_t::NODE_ID); + node->set_name(document_t::ALL); + break; + + case token_t::LPAREN: + node = new op_t(op_t::O_COMMA); + node->set_left(parse_value_expr(in, tflags | XPATH_PARSE_PARTIAL)); + if (! node->left()) + throw_(parse_error, tok.symbol << " operator not followed by argument"); + + tok = next_token(in, tflags); + if (tok.kind != token_t::RPAREN) + tok.unexpected(0xff, ')'); + break; + + default: + push_token(tok); + break; + } + +#if 0 +#ifdef USE_BOOST_PYTHON + done: +#endif +#endif + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_predicate_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_value_term(in, tflags)); + + if (node) { + token_t& tok = next_token(in, tflags); + while (tok.kind == token_t::LBRACKET) { + ptr_op_t prev(node); + node = new op_t(op_t::O_PRED); + node->set_left(prev); + node->set_right(parse_value_expr(in, tflags | XPATH_PARSE_PARTIAL)); + if (! node->right()) + throw_(parse_error, "[ operator not followed by valid expression"); + + tok = next_token(in, tflags); + if (tok.kind != token_t::RBRACKET) + tok.unexpected(0xff, ']'); + + tok = next_token(in, tflags); + } + + push_token(tok); + } + + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_path_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_predicate_expr(in, tflags)); + + if (node) { + token_t& tok = next_token(in, tflags); + if (tok.kind == token_t::SLASH) { + ptr_op_t prev(node); + + tok = next_token(in, tflags); + node = new op_t(tok.kind == token_t::SLASH ? + op_t::O_RFIND : op_t::O_FIND); + if (tok.kind != token_t::SLASH) + push_token(tok); + + node->set_left(prev); + node->set_right(parse_path_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, "/ operator not followed by a valid term"); + } else { + push_token(tok); + } + } + + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_unary_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node; + + token_t& tok = next_token(in, tflags); + + switch (tok.kind) { + case token_t::EXCLAM: { + ptr_op_t texpr(parse_path_expr(in, tflags)); + if (! texpr) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + + // A very quick optimization + if (texpr->kind == op_t::VALUE) { + texpr->as_value().in_place_negate(); + node = texpr; + } else { + node = new op_t(op_t::O_NOT); + node->set_left(texpr); + } + break; + } + + case token_t::MINUS: { + ptr_op_t texpr(parse_path_expr(in, tflags)); + if (! texpr) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + + // A very quick optimization + if (texpr->kind == op_t::VALUE) { + texpr->as_value().in_place_negate(); + node = texpr; + } else { + node = new op_t(op_t::O_NEG); + node->set_left(texpr); + } + break; + } + + default: + push_token(tok); + node = parse_path_expr(in, tflags); + break; + } + + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_union_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_unary_expr(in, tflags)); + + if (node) { + token_t& tok = next_token(in, tflags); + if (tok.kind == token_t::PIPE || tok.kind == token_t::KW_UNION) { + ptr_op_t prev(node); + node = new op_t(op_t::O_UNION); + node->set_left(prev); + node->set_right(parse_union_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + } else { + push_token(tok); + } + } + + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_mul_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_union_expr(in, tflags)); + + if (node) { + token_t& tok = next_token(in, tflags); + if (tok.kind == token_t::STAR || tok.kind == token_t::KW_DIV) { + ptr_op_t prev(node); + node = new op_t(tok.kind == token_t::STAR ? + op_t::O_MUL : op_t::O_DIV); + node->set_left(prev); + node->set_right(parse_mul_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + + tok = next_token(in, tflags); + } + push_token(tok); + } + + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_add_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_mul_expr(in, tflags)); + + if (node) { + token_t& tok = next_token(in, tflags); + if (tok.kind == token_t::PLUS || + tok.kind == token_t::MINUS) { + ptr_op_t prev(node); + node = new op_t(tok.kind == token_t::PLUS ? + op_t::O_ADD : op_t::O_SUB); + node->set_left(prev); + node->set_right(parse_add_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + + tok = next_token(in, tflags); + } + push_token(tok); + } + + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_logic_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_add_expr(in, tflags)); + + if (node) { + op_t::kind_t kind = op_t::LAST; + flags_t _flags = tflags; + token_t& tok = next_token(in, tflags); + switch (tok.kind) { + case token_t::EQUAL: + kind = op_t::O_EQ; + break; + case token_t::NEQUAL: + kind = op_t::O_NEQ; + break; + case token_t::LESS: + kind = op_t::O_LT; + break; + case token_t::LESSEQ: + kind = op_t::O_LTE; + break; + case token_t::GREATER: + kind = op_t::O_GT; + break; + case token_t::GREATEREQ: + kind = op_t::O_GTE; + break; + default: + push_token(tok); + break; + } + + if (kind != op_t::LAST) { + ptr_op_t prev(node); + node = new op_t(kind); + node->set_left(prev); + node->set_right(parse_add_expr(in, _flags)); + + if (! node->right()) { + if (tok.kind == token_t::PLUS) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + else + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + } + } + } + + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_and_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_logic_expr(in, tflags)); + + if (node) { + token_t& tok = next_token(in, tflags); + if (tok.kind == token_t::KW_AND) { + ptr_op_t prev(node); + node = new op_t(op_t::O_AND); + node->set_left(prev); + node->set_right(parse_and_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + } else { + push_token(tok); + } + } + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_or_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_and_expr(in, tflags)); + + if (node) { + token_t& tok = next_token(in, tflags); + if (tok.kind == token_t::KW_OR) { + ptr_op_t prev(node); + node = new op_t(op_t::O_OR); + node->set_left(prev); + node->set_right(parse_or_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + } else { + push_token(tok); + } + } + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_value_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_or_expr(in, tflags)); + + if (node) { + token_t& tok = next_token(in, tflags); + if (tok.kind == token_t::COMMA) { + ptr_op_t prev(node); + node = new op_t(op_t::O_COMMA); + node->set_left(prev); + node->set_right(parse_value_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + tok.symbol << " operator not followed by argument"); + tok = next_token(in, tflags); + } + + if (tok.kind != token_t::TOK_EOF) { + if (tflags & XPATH_PARSE_PARTIAL) + push_token(tok); + else + tok.unexpected(); + } + } + else if (! (tflags & XPATH_PARSE_PARTIAL)) { + throw_(parse_error, "Failed to parse value expression"); + } + + return node; +} + +xpath_t::ptr_op_t +xpath_t::parse_expr(std::istream& in, flags_t tflags) const +{ + ptr_op_t node(parse_value_expr(in, tflags)); + + if (use_lookahead) { + use_lookahead = false; +#ifdef THREADSAFE + lookahead.rewind(in); +#else + lookahead->rewind(in); +#endif + } +#ifdef THREADSAFE + lookahead.clear(); +#else + lookahead->clear(); +#endif + + return node; +} + + +xpath_t::ptr_op_t xpath_t::op_t::compile(scope_t& scope) +{ + switch (kind) { + case VAR_NAME: + case FUNC_NAME: + if (ptr_op_t def = scope.lookup(as_string())) { +#if 1 + return def; +#else + // Aren't definitions compiled when they go in? Would + // recompiling here really add any benefit? + return def->compile(scope); +#endif + } + return this; + + default: + break; + } + + if (kind < TERMINALS) + return this; + + ptr_op_t lhs(left()->compile(scope)); + ptr_op_t rhs(right() ? right()->compile(scope) : ptr_op_t()); + + if (lhs == left() && (! rhs || rhs == right())) + return this; + + ptr_op_t intermediate(copy(lhs, rhs)); + + if (lhs->is_value() && (! rhs || rhs->is_value())) + return wrap_value(intermediate->calc(scope)); + + return intermediate; +} + + +value_t xpath_t::op_t::current_value(scope_t& scope) +{ + xpath_t::context_scope_t& context(CONTEXT_SCOPE(scope)); + return context.value(); +} + +node_t& xpath_t::op_t::current_xml_node(scope_t& scope) +{ + xpath_t::context_scope_t& context(CONTEXT_SCOPE(scope)); + return context.xml_node(); +} + +namespace { + value_t select_nodes(xpath_t::scope_t& scope, const value_t& nodes, + xpath_t::ptr_op_t selection_path, bool recurse); + + value_t select_recursively(xpath_t::scope_t& scope, node_t& xml_node, + xpath_t::ptr_op_t selection_path) + { + value_t result; + + if (xml_node.is_parent_node()) { + parent_node_t& parent_node(xml_node.as_parent_node()); + foreach (node_t * child, parent_node) + result.push_back(select_nodes(scope, child, selection_path, true)); + } + return result; + } + + value_t select_nodes(xpath_t::scope_t& scope, const value_t& nodes, + xpath_t::ptr_op_t selection_path, bool recurse) + { + if (nodes.is_null()) + return NULL_VALUE; + + value_t result; + + if (! nodes.is_sequence()) { + xpath_t::context_scope_t node_scope(scope, nodes, 0, 1); + result.push_back(selection_path->calc(node_scope)); + + if (recurse && nodes.is_xml_node()) + result.push_back(select_recursively(scope, *nodes.as_xml_node(), + selection_path)); + } else { + std::size_t index = 0; + std::size_t size = nodes.as_sequence().size(); + + foreach (const value_t& node, nodes.as_sequence()) { + xpath_t::context_scope_t node_scope(scope, node, index, size); + result.push_back(selection_path->calc(node_scope)); + + if (recurse && nodes.is_xml_node()) + result.push_back(select_recursively(scope, *node.as_xml_node(), + selection_path)); + + index++; + } + } + return result; + } +} + +value_t xpath_t::op_t::calc(scope_t& scope) +{ + bool find_all_nodes = false; + + switch (kind) { + case VALUE: + return as_value(); + + case VAR_NAME: + case FUNC_NAME: + if (ptr_op_t reference = compile(scope)) { + return reference->calc(scope); + } else { + throw_(calc_error, "No " << (kind == VAR_NAME ? "variable" : "function") + << " named '" << as_string() << "'"); + } + break; + + case FUNCTION: + // This should never be evaluated directly; it only appears as the + // left node of an O_CALL operator. + assert(false); + break; + + case O_CALL: { + call_scope_t call_args(scope); + + if (right()) + call_args.set_args(right()->calc(scope)); + + ptr_op_t func = left(); + string name; + + if (func->kind == FUNC_NAME) { + name = func->as_string(); + func = func->compile(scope); + } + + if (func->kind != FUNCTION) + throw_(calc_error, + name.empty() ? string("Attempt to call non-function") : + (string("Attempt to call unknown function '") + name + "'")); + + return func->as_function()(call_args); + } + + case ARG_INDEX: { + call_scope_t& args(CALL_SCOPE(scope)); + + if (as_long() >= 0 && as_long() < args.size()) + return args[as_long()]; + else + throw_(calc_error, "Reference to non-existing argument"); + break; + } + + case O_FIND: + case O_RFIND: + return select_nodes(scope, left()->calc(scope), right(), kind == O_RFIND); + + case O_PRED: { + value_t values = left()->calc(scope); + + if (! values.is_null()) { + op_predicate pred(right()); + + if (! values.is_sequence()) { + context_scope_t value_scope(scope, values, 0, 1); + if (pred(value_scope)) + return values; + return NULL_VALUE; + } else { + std::size_t index = 0; + std::size_t size = values.as_sequence().size(); + + value_t result; + + foreach (const value_t& value, values.as_sequence()) { + context_scope_t value_scope(scope, value, index, size); + if (pred(value_scope)) + result.push_back(value); + index++; + } + return result; + } + } + break; + } + + case NODE_ID: + switch (as_name()) { + case document_t::CURRENT: + return current_value(scope); + + case document_t::PARENT: + if (optional<parent_node_t&> parent = current_xml_node(scope).parent()) + return &*parent; + else + throw_(std::logic_error, "Attempt to access parent of root node"); + break; + + case document_t::ROOT: + return ¤t_xml_node(scope).document(); + + case document_t::ALL: + find_all_nodes = true; + break; + + default: + break; // pass down to the NODE_NAME case + } + // fall through... + + case NODE_NAME: { + node_t& current_node(current_xml_node(scope)); + + if (current_node.is_parent_node()) { + const bool have_name_id = kind == NODE_ID; + + parent_node_t& parent(current_node.as_parent_node()); + + value_t result; + foreach (node_t * child, parent) { + if (find_all_nodes || + ( have_name_id && as_name() == child->name_id()) || + (! have_name_id && as_string() == child->name())) + result.push_back(child); + } + return result; + } + break; + } + + case ATTR_ID: + case ATTR_NAME: + if (optional<value_t&> value = + kind == ATTR_ID ? current_xml_node(scope).get_attr(as_name()) : + current_xml_node(scope).get_attr(as_string())) + return *value; + + break; + + case O_NEQ: + return left()->calc(scope) != right()->calc(scope); + case O_EQ: + return left()->calc(scope) == right()->calc(scope); + case O_LT: + return left()->calc(scope) < right()->calc(scope); + case O_LTE: + return left()->calc(scope) <= right()->calc(scope); + case O_GT: + return left()->calc(scope) > right()->calc(scope); + case O_GTE: + return left()->calc(scope) >= right()->calc(scope); + + case O_ADD: + return left()->calc(scope) + right()->calc(scope); + case O_SUB: + return left()->calc(scope) - right()->calc(scope); + case O_MUL: + return left()->calc(scope) * right()->calc(scope); + case O_DIV: + return left()->calc(scope) / right()->calc(scope); + + case O_NEG: + assert(! right()); + return left()->calc(scope).negate(); + + case O_NOT: + assert(! right()); + return ! left()->calc(scope); + + case O_AND: + return left()->calc(scope) && right()->calc(scope); + case O_OR: + return left()->calc(scope) || right()->calc(scope); + + case O_COMMA: + case O_UNION: { + value_t result(left()->calc(scope)); + + ptr_op_t next = right(); + while (next) { + ptr_op_t value_op; + if (next->kind == O_COMMA || next->kind == O_UNION) { + value_op = next->left(); + next = next->right(); + } else { + value_op = next; + next = NULL; + } + + result.push_back(value_op->calc(scope)); + } + return result; + } + + case LAST: + default: + assert(false); + break; + } + + return NULL_VALUE; +} + + +bool xpath_t::op_t::print(std::ostream& out, print_context_t& context) const +{ + bool found = false; + + if (context.start_pos && this == context.op_to_find) { + *context.start_pos = (long)out.tellp() - 1; + found = true; + } + + string symbol; + + switch (kind) { + case VALUE: { + const value_t& value(as_value()); + switch (value.type()) { + case value_t::VOID: + out << "<VOID>"; + break; + case value_t::BOOLEAN: + if (value) + out << "1"; + else + out << "0"; + break; + case value_t::INTEGER: + out << value; + break; + case value_t::AMOUNT: + if (! context.relaxed) + out << '{'; + out << value; + if (! context.relaxed) + out << '}'; + break; + case value_t::BALANCE: + case value_t::BALANCE_PAIR: + assert(false); + break; + case value_t::DATETIME: + out << '[' << value << ']'; + break; + case value_t::STRING: + out << '"' << value << '"'; + break; + + case value_t::XML_NODE: + out << '<' << value << '>'; + break; + case value_t::POINTER: + out << '&' << value; + break; + case value_t::SEQUENCE: + out << '~' << value << '~'; + break; + } + break; + } + + case ATTR_ID: + out << '@'; + // fall through... + case NODE_ID: { + context_scope_t& node_scope(CONTEXT_SCOPE(context.scope)); + if (optional<const char *> name = + node_scope.xml_node().document().lookup_name(as_name())) + out << *name; + else + out << '#' << as_name(); + break; + } + + case NODE_NAME: + case FUNC_NAME: + out << as_string(); + break; + + case ATTR_NAME: + out << '@' << as_string(); + break; + + case VAR_NAME: + out << '$' << as_string(); + break; + + case FUNCTION: + out << "<FUNCTION>"; + break; + + case ARG_INDEX: + out << '@' << as_long(); + break; + + case O_NOT: + out << "!"; + if (left() && left()->print(out, context)) + found = true; + break; + case O_NEG: + out << "-"; + if (left() && left()->print(out, context)) + found = true; + break; + + case O_UNION: + if (left() && left()->print(out, context)) + found = true; + out << " | "; + if (right() && right()->print(out, context)) + found = true; + break; + + case O_ADD: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " + "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_SUB: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " - "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_MUL: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " * "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_DIV: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " / "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + + case O_NEQ: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " != "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_EQ: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " == "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_LT: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " < "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_LTE: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " <= "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_GT: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " > "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_GTE: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " >= "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + + case O_AND: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " & "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + case O_OR: + out << "("; + if (left() && left()->print(out, context)) + found = true; + out << " | "; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + + case O_COMMA: + if (left() && left()->print(out, context)) + found = true; + out << ", "; + if (right() && right()->print(out, context)) + found = true; + break; + + case O_CALL: + if (left() && left()->print(out, context)) + found = true; + out << "("; + if (right() && right()->print(out, context)) + found = true; + out << ")"; + break; + + case O_FIND: + if (left() && left()->print(out, context)) + found = true; + out << "/"; + if (right() && right()->print(out, context)) + found = true; + break; + case O_RFIND: + if (left() && left()->print(out, context)) + found = true; + out << "//"; + if (right() && right()->print(out, context)) + found = true; + break; + case O_PRED: + if (left() && left()->print(out, context)) + found = true; + out << "["; + if (right() && right()->print(out, context)) + found = true; + out << "]"; + break; + + case LAST: + default: + assert(false); + break; + } + + if (! symbol.empty()) { + if (amount_t::current_pool->find(symbol)) + out << '@'; + out << symbol; + } + + if (context.end_pos && this == context.op_to_find) + *context.end_pos = (long)out.tellp() - 1; + + return found; +} + +void xpath_t::op_t::dump(std::ostream& out, const int depth) const +{ + out.setf(std::ios::left); + out.width(10); + out << this << " "; + + for (int i = 0; i < depth; i++) + out << " "; + + switch (kind) { + case VALUE: + out << "VALUE - " << as_value(); + break; + + case NODE_NAME: + out << "NODE_NAME - " << as_string(); + break; + case NODE_ID: + out << "NODE_ID - " << as_name(); + break; + + case ATTR_NAME: + out << "ATTR_NAME - " << as_string(); + break; + case ATTR_ID: + out << "ATTR_ID - " << as_name(); + break; + + case FUNC_NAME: + out << "FUNC_NAME - " << as_string(); + break; + + case VAR_NAME: + out << "VAR_NAME - " << as_string(); + break; + + case ARG_INDEX: + out << "ARG_INDEX - " << as_long(); + break; + + case FUNCTION: + out << "FUNCTION"; + break; + + case O_CALL: out << "O_CALL"; break; + + case O_NOT: out << "O_NOT"; break; + case O_NEG: out << "O_NEG"; break; + + case O_UNION: out << "O_UNION"; break; + + case O_ADD: out << "O_ADD"; break; + case O_SUB: out << "O_SUB"; break; + case O_MUL: out << "O_MUL"; break; + case O_DIV: out << "O_DIV"; break; + + case O_NEQ: out << "O_NEQ"; break; + case O_EQ: out << "O_EQ"; break; + case O_LT: out << "O_LT"; break; + case O_LTE: out << "O_LTE"; break; + case O_GT: out << "O_GT"; break; + case O_GTE: out << "O_GTE"; break; + + case O_AND: out << "O_AND"; break; + case O_OR: out << "O_OR"; break; + + case O_COMMA: out << "O_COMMA"; break; + + case O_FIND: out << "O_FIND"; break; + case O_RFIND: out << "O_RFIND"; break; + case O_PRED: out << "O_PRED"; break; + + case LAST: + default: + assert(false); + break; + } + + out << " (" << refc << ')' << std::endl; + + if (kind > TERMINALS) { + if (left()) { + left()->dump(out, depth + 1); + if (right()) + right()->dump(out, depth + 1); + } else { + assert(! right()); + } + } +} + +} // namespace xml + + +value_t xml_command(xml::xpath_t::call_scope_t& args) +{ + assert(args.size() == 0); + + value_t ostream = args.resolve("ostream"); + std::ostream& outs(ostream.as_ref_lval<std::ostream>()); + + xml::xpath_t::context_scope_t& node_context(CONTEXT_SCOPE(args)); + node_context.xml_node().print(outs); + + return true; +} + +} // namespace ledger diff --git a/src/traversal/xpath.h b/src/traversal/xpath.h new file mode 100644 index 00000000..c9f299fc --- /dev/null +++ b/src/traversal/xpath.h @@ -0,0 +1,873 @@ +/* + * Copyright (c) 2003-2007, John Wiegley. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of New Artisans LLC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _XPATH_H +#define _XPATH_H + +#include "document.h" + +namespace ledger { +namespace xml { + +class xpath_t +{ +public: + struct op_t; + typedef intrusive_ptr<op_t> ptr_op_t; + + static void initialize(); + static void shutdown(); + + DECLARE_EXCEPTION(parse_error); + DECLARE_EXCEPTION(compile_error); + DECLARE_EXCEPTION(calc_error); + +public: + class call_scope_t; + + typedef function<value_t (call_scope_t&)> function_t; + +#define MAKE_FUNCTOR(x) \ + xml::xpath_t::op_t::wrap_functor(bind(&x, this, _1)) +#define WRAP_FUNCTOR(x) \ + xml::xpath_t::op_t::wrap_functor(x) + +public: + class scope_t : public noncopyable + { + public: + enum type_t { + CHILD_SCOPE, + SYMBOL_SCOPE, + CALL_SCOPE, + CONTEXT_SCOPE + } type_; + + explicit scope_t(type_t _type) : type_(_type) { + TRACE_CTOR(xpath_t::scope_t, "type_t"); + } + virtual ~scope_t() { + TRACE_DTOR(xpath_t::scope_t); + } + + const type_t type() const { + return type_; + } + + virtual void define(const string& name, ptr_op_t def) = 0; + void define(const string& name, const value_t& val); + virtual ptr_op_t lookup(const string& name) = 0; + value_t resolve(const string& name) { + return lookup(name)->calc(*this); + } + + virtual optional<scope_t&> find_scope(const type_t _type, + bool skip_this = false) = 0; + virtual optional<scope_t&> find_first_scope(const type_t _type1, + const type_t _type2, + bool skip_this = false) = 0; + + template <typename T> + T& find_scope(bool skip_this = false) { + assert(false); + } + template <typename T> + optional<T&> maybe_find_scope(bool skip_this = false) { + assert(false); + } + }; + + class child_scope_t : public scope_t + { + scope_t * parent; + + public: + explicit child_scope_t(type_t _type = CHILD_SCOPE) + : scope_t(_type), parent(NULL) { + TRACE_CTOR(xpath_t::child_scope_t, "type_t"); + } + explicit child_scope_t(scope_t& _parent, type_t _type = CHILD_SCOPE) + : scope_t(_type), parent(&_parent) { + TRACE_CTOR(xpath_t::child_scope_t, "scope_t&, type_t"); + } + virtual ~child_scope_t() { + TRACE_DTOR(xpath_t::child_scope_t); + } + public: + virtual void define(const string& name, ptr_op_t def) { + if (parent) + parent->define(name, def); + } + virtual ptr_op_t lookup(const string& name) { + if (parent) + return parent->lookup(name); + return ptr_op_t(); + } + + virtual optional<scope_t&> find_scope(type_t _type, + bool skip_this = false) { + for (scope_t * ptr = (skip_this ? parent : this); ptr; ) { + if (ptr->type() == _type) + return *ptr; + + ptr = polymorphic_downcast<child_scope_t *>(ptr)->parent; + } + return none; + } + + virtual optional<scope_t&> find_first_scope(const type_t _type1, + const type_t _type2, + bool skip_this = false) { + for (scope_t * ptr = (skip_this ? parent : this); ptr; ) { + if (ptr->type() == _type1 || ptr->type() == _type2) + return *ptr; + + ptr = polymorphic_downcast<child_scope_t *>(ptr)->parent; + } + return none; + } + }; + + class symbol_scope_t : public child_scope_t + { + typedef std::map<const string, ptr_op_t> symbol_map; + symbol_map symbols; + + public: + explicit symbol_scope_t() + : child_scope_t(SYMBOL_SCOPE) { + TRACE_CTOR(xpath_t::symbol_scope_t, ""); + } + explicit symbol_scope_t(scope_t& _parent) + : child_scope_t(_parent, SYMBOL_SCOPE) { + TRACE_CTOR(xpath_t::symbol_scope_t, "scope_t&"); + } + virtual ~symbol_scope_t() { + TRACE_DTOR(xpath_t::symbol_scope_t); + } + + virtual void define(const string& name, ptr_op_t def); + void define(const string& name, const value_t& val) { + scope_t::define(name, val); + } + virtual ptr_op_t lookup(const string& name); + }; + + class call_scope_t : public child_scope_t + { + value_t args; + + public: + explicit call_scope_t(scope_t& _parent) + : child_scope_t(_parent, CALL_SCOPE) { + TRACE_CTOR(xpath_t::call_scope_t, "scope_t&"); + } + virtual ~call_scope_t() { + TRACE_DTOR(xpath_t::call_scope_t); + } + + void set_args(const value_t& _args) { + args = _args; + } + + value_t& value() { + return args; + } + + value_t& operator[](const int index) { + return args[index]; + } + const value_t& operator[](const int index) const { + return args[index]; + } + + void push_back(const value_t& val) { + args.push_back(val); + } + void pop_back() { + args.pop_back(); + } + + const std::size_t size() const { + return args.size(); + } + }; + + class context_scope_t : public child_scope_t + { + public: + value_t current_element; + std::size_t element_index; + std::size_t sequence_size; + + explicit context_scope_t(scope_t& _parent, + const value_t& _element = NULL_VALUE, + const std::size_t _element_index = 0, + const std::size_t _sequence_size = 0) + : child_scope_t(_parent, CONTEXT_SCOPE), current_element(_element), + element_index(_element_index), sequence_size(_sequence_size) + { + TRACE_CTOR(xpath_t::context_scope_t, "scope_t&, const value_t&, ..."); + } + virtual ~context_scope_t() { + TRACE_DTOR(xpath_t::context_scope_t); + } + + const std::size_t index() const { + return element_index; + } + const std::size_t size() const { + return sequence_size; + } + + value_t& value() { + return current_element; + } + node_t& xml_node() { + assert(current_element.is_xml_node()); + return *current_element.as_xml_node(); + } + }; + +#define XPATH_PARSE_NORMAL 0x00 +#define XPATH_PARSE_PARTIAL 0x01 +#define XPATH_PARSE_RELAXED 0x02 +#define XPATH_PARSE_NO_MIGRATE 0x04 +#define XPATH_PARSE_NO_REDUCE 0x08 +#define XPATH_PARSE_ALLOW_DATE 0x10 + + typedef uint_least8_t flags_t; + +private: + struct token_t + { + enum kind_t { + VALUE, // any kind of literal value + + IDENT, // [A-Za-z_][-A-Za-z0-9_:]* + DOLLAR, // $ + AT_SYM, // @ + + DOT, // . + DOTDOT, // .. + SLASH, // / + + LPAREN, // ( + RPAREN, // ) + LBRACKET, // [ + RBRACKET, // ] + + EQUAL, // = + NEQUAL, // != + LESS, // < + LESSEQ, // <= + GREATER, // > + GREATEREQ, // >= + + MINUS, // - + PLUS, // + + STAR, // * + KW_DIV, + + EXCLAM, // ! + KW_AND, + KW_OR, + KW_MOD, + + PIPE, // | + KW_UNION, + + COMMA, // , + + TOK_EOF, + UNKNOWN + } kind; + + char symbol[3]; + value_t value; + std::size_t length; + + explicit token_t() : kind(UNKNOWN), length(0) { + TRACE_CTOR(xpath_t::token_t, ""); + } + token_t(const token_t& other) { + assert(false); + TRACE_CTOR(xpath_t::token_t, "copy"); + *this = other; + } + ~token_t() { + TRACE_DTOR(xpath_t::token_t); + } + + token_t& operator=(const token_t& other) { + if (&other == this) + return *this; + assert(false); + return *this; + } + + void clear() { + kind = UNKNOWN; + length = 0; + value = NULL_VALUE; + + symbol[0] = '\0'; + symbol[1] = '\0'; + symbol[2] = '\0'; + } + + void parse_ident(std::istream& in); + void next(std::istream& in, flags_t flags); + void rewind(std::istream& in); + void unexpected(); + + static void unexpected(char c, char wanted = '\0'); + }; + +public: + class path_iterator_t + { + typedef node_t * pointer; + typedef node_t& reference; + + xpath_t& path_expr; + scope_t& scope; + + mutable value_t::sequence_t sequence; + mutable bool searched; + + public: + typedef value_t::sequence_t::iterator iterator; + typedef value_t::sequence_t::const_iterator const_iterator; + + path_iterator_t(xpath_t& _path_expr, scope_t& _scope) + : path_expr(_path_expr), scope(_scope), searched(false) {} + + iterator begin() { + if (! searched) { + sequence = path_expr.calc(scope).to_sequence(); + searched = true; + } + return sequence.begin(); + } + const_iterator begin() const { + return const_cast<path_iterator_t *>(this)->begin(); + } + + iterator end() { return sequence.end(); } + const_iterator end() const { return sequence.end(); } + }; + + struct op_t : public noncopyable + { + enum kind_t { + VALUE, + + FUNC_NAME, + VAR_NAME, + ARG_INDEX, + + NODE_ID, + NODE_NAME, + ATTR_ID, + ATTR_NAME, + + CONSTANTS, // constants end here + + FUNCTION, + + TERMINALS, // terminals end here + + O_CALL, + O_ARG, + + O_FIND, + O_RFIND, + O_PRED, + + O_NEQ, + O_EQ, + O_LT, + O_LTE, + O_GT, + O_GTE, + + O_ADD, + O_SUB, + O_MUL, + O_DIV, + O_NEG, + + O_NOT, + O_AND, + O_OR, + + O_UNION, + + O_COMMA, + + LAST // operators end here + }; + + kind_t kind; + mutable short refc; + ptr_op_t left_; + + variant<unsigned int, // used by ARG_INDEX and O_ARG + value_t, // used by constant VALUE + string, // used by constants SYMBOL, *_NAME + function_t, // used by terminal FUNCTION + node_t::nameid_t, // used by NODE_ID and ATTR_ID + ptr_op_t> // used by all binary operators + data; + + explicit op_t(const kind_t _kind) : kind(_kind), refc(0){ + TRACE_CTOR(xpath_t::op_t, "const kind_t"); + } + ~op_t() { + TRACE_DTOR(xpath_t::op_t); + + DEBUG("ledger.xpath.memory", "Destroying " << this); + assert(refc == 0); + } + + bool is_long() const { + return data.type() == typeid(unsigned int); + } + unsigned int& as_long() { + assert(kind == ARG_INDEX || kind == O_ARG); + return boost::get<unsigned int>(data); + } + const unsigned int& as_long() const { + return const_cast<op_t *>(this)->as_long(); + } + void set_long(unsigned int val) { + data = val; + } + + bool is_value() const { + return kind == VALUE; + } + value_t& as_value() { + assert(kind == VALUE); + return boost::get<value_t>(data); + } + const value_t& as_value() const { + return const_cast<op_t *>(this)->as_value(); + } + void set_value(const value_t& val) { + data = val; + } + + bool is_string() const { + return data.type() == typeid(string); + } + string& as_string() { + assert(kind == NODE_NAME || kind == ATTR_NAME || kind == FUNC_NAME); + return boost::get<string>(data); + } + const string& as_string() const { + return const_cast<op_t *>(this)->as_string(); + } + void set_string(const string& val) { + data = val; + } + + bool is_function() const { + return kind == FUNCTION; + } + function_t& as_function() { + assert(kind == FUNCTION); + return boost::get<function_t>(data); + } + const function_t& as_function() const { + return const_cast<op_t *>(this)->as_function(); + } + void set_function(const function_t& val) { + data = val; + } + + bool is_name() const { + return data.type() == typeid(node_t::nameid_t); + } + node_t::nameid_t& as_name() { + assert(kind == NODE_ID || kind == ATTR_ID); + return boost::get<node_t::nameid_t>(data); + } + const node_t::nameid_t& as_name() const { + return const_cast<op_t *>(this)->as_name(); + } + void set_name(const node_t::nameid_t& val) { + data = val; + } + + ptr_op_t& as_op() { + assert(kind > TERMINALS); + return boost::get<ptr_op_t>(data); + } + const ptr_op_t& as_op() const { + return const_cast<op_t *>(this)->as_op(); + } + + void acquire() const { + DEBUG("ledger.xpath.memory", + "Acquiring " << this << ", refc now " << refc + 1); + assert(refc >= 0); + refc++; + } + void release() const { + DEBUG("ledger.xpath.memory", + "Releasing " << this << ", refc now " << refc - 1); + assert(refc > 0); + if (--refc == 0) + checked_delete(this); + } + + ptr_op_t& left() { + return left_; + } + const ptr_op_t& left() const { + assert(kind > TERMINALS); + return left_; + } + void set_left(const ptr_op_t& expr) { + assert(kind > TERMINALS); + left_ = expr; + } + + ptr_op_t& right() { + assert(kind > TERMINALS); + return as_op(); + } + const ptr_op_t& right() const { + assert(kind > TERMINALS); + return as_op(); + } + void set_right(const ptr_op_t& expr) { + assert(kind > TERMINALS); + data = expr; + } + + static ptr_op_t new_node(kind_t _kind, ptr_op_t _left = NULL, + ptr_op_t _right = NULL); + ptr_op_t copy(ptr_op_t _left = NULL, ptr_op_t _right = NULL) const { + return new_node(kind, _left, _right); + } + + static ptr_op_t wrap_value(const value_t& val); + static ptr_op_t wrap_functor(const function_t& fobj); + + ptr_op_t compile(scope_t& scope); + value_t current_value(scope_t& scope); + node_t& current_xml_node(scope_t& scope); + value_t calc(scope_t& scope); + + struct print_context_t + { + scope_t& scope; + const bool relaxed; + const ptr_op_t& op_to_find; + unsigned long * start_pos; + unsigned long * end_pos; + + print_context_t(scope_t& _scope, + const bool _relaxed = false, + const ptr_op_t& _op_to_find = ptr_op_t(), + unsigned long * _start_pos = NULL, + unsigned long * _end_pos = NULL) + : scope(_scope), relaxed(_relaxed), op_to_find(_op_to_find), + start_pos(_start_pos), end_pos(_end_pos) {} + }; + + bool print(std::ostream& out, print_context_t& context) const; + void dump(std::ostream& out, const int depth) const; + + friend inline void intrusive_ptr_add_ref(xpath_t::op_t * op) { + op->acquire(); + } + friend inline void intrusive_ptr_release(xpath_t::op_t * op) { + op->release(); + } + }; + + class op_predicate { + ptr_op_t op; + public: + explicit op_predicate(ptr_op_t _op) : op(_op) {} + bool operator()(scope_t& scope) { + return op->calc(scope).to_boolean(); + } + }; + +public: + ptr_op_t ptr; + + xpath_t& operator=(ptr_op_t _expr) { + expr = ""; + ptr = _expr; + return *this; + } + +#ifdef THREADSAFE + mutable token_t lookahead; +#else + static token_t * lookahead; +#endif + mutable bool use_lookahead; + + token_t& next_token(std::istream& in, flags_t tflags) const { + if (use_lookahead) + use_lookahead = false; + else +#ifdef THREADSAFE + lookahead.next(in, tflags); +#else + lookahead->next(in, tflags); +#endif +#ifdef THREADSAFE + return lookahead; +#else + return *lookahead; +#endif + } + void push_token(const token_t& tok) const { +#ifdef THREADSAFE + assert(&tok == &lookahead); +#else + assert(&tok == lookahead); +#endif + use_lookahead = true; + } + void push_token() const { + use_lookahead = true; + } + + ptr_op_t parse_value_term(std::istream& in, flags_t flags) const; + ptr_op_t parse_predicate_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_path_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_unary_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_union_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_mul_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_add_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_logic_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_and_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_or_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_querycolon_expr(std::istream& in, flags_t flags) const; + ptr_op_t parse_value_expr(std::istream& in, flags_t flags) const; + + ptr_op_t parse_expr(std::istream& in, + flags_t flags = XPATH_PARSE_RELAXED) const; + + ptr_op_t parse_expr(const string& str, + flags_t tflags = XPATH_PARSE_RELAXED) const + { + std::istringstream stream(str); +#if 0 + try { +#endif + return parse_expr(stream, tflags); +#if 0 + } + catch (error * err) { + err->context.push_back + (new line_context(str, (long)stream.tellg() - 1, + "While parsing value expression:")); + throw err; + } +#endif + } + + ptr_op_t parse_expr(const char * p, + flags_t tflags = XPATH_PARSE_RELAXED) const { + return parse_expr(string(p), tflags); + } + + bool print(std::ostream& out, op_t::print_context_t& context) const { + if (ptr) + ptr->print(out, context); + return true; + } + +public: + string expr; + flags_t flags; // flags used to parse `expr' + + explicit xpath_t() : ptr(NULL), use_lookahead(false), flags(0) { + TRACE_CTOR(xpath_t, ""); + } + explicit xpath_t(ptr_op_t _ptr) : ptr(_ptr), use_lookahead(false) { + TRACE_CTOR(xpath_t, "ptr_op_t"); + } + + explicit xpath_t(const string& _expr, flags_t _flags = XPATH_PARSE_RELAXED) + : ptr(NULL), use_lookahead(false), flags(0) { + TRACE_CTOR(xpath_t, "const string&, flags_t"); + if (! _expr.empty()) + parse(_expr, _flags); + } + explicit xpath_t(std::istream& in, flags_t _flags = XPATH_PARSE_RELAXED) + : ptr(NULL), use_lookahead(false), flags(0) { + TRACE_CTOR(xpath_t, "std::istream&, flags_t"); + parse(in, _flags); + } + xpath_t(const xpath_t& other) + : ptr(other.ptr), use_lookahead(false), + expr(other.expr), flags(other.flags) { + TRACE_CTOR(xpath_t, "copy"); + } + ~xpath_t() { + TRACE_DTOR(xpath_t); + } + +#if 0 + xpath_t& operator=(const string& _expr) { + parse(_expr); + return *this; + } +#endif + xpath_t& operator=(const xpath_t& _expr); + +#if 0 + operator ptr_op_t() throw() { + return ptr; + } + operator bool() const throw() { + return ptr != NULL; + } + operator string() const throw() { + return expr; + } +#endif + + void parse(const string& _expr, flags_t _flags = XPATH_PARSE_RELAXED) { + expr = _expr; + flags = _flags; + ptr = parse_expr(_expr, _flags); + } + void parse(std::istream& in, flags_t _flags = XPATH_PARSE_RELAXED) { + expr = ""; + flags = _flags; + ptr = parse_expr(in, _flags); + } + + void compile(scope_t& scope) { + if (ptr.get()) + ptr = ptr->compile(scope); + } + + value_t calc(scope_t& scope) const { + if (ptr.get()) + return ptr->calc(scope); + return NULL_VALUE; + } + + static value_t eval(const string& _expr, scope_t& scope) { + return xpath_t(_expr).calc(scope); + } + + path_iterator_t find_all(scope_t& scope) { + return path_iterator_t(*this, scope); + } + + void print(std::ostream& out, scope_t& scope) const { + op_t::print_context_t context(scope); + print(out, context); + } + + void dump(std::ostream& out) const { + if (ptr) + ptr->dump(out, 0); + } +}; + +inline xpath_t::ptr_op_t +xpath_t::op_t::new_node(kind_t _kind, ptr_op_t _left, ptr_op_t _right) { + ptr_op_t node(new op_t(_kind)); + node->set_left(_left); + node->set_right(_right); + return node; +} + +inline xpath_t::ptr_op_t +xpath_t::op_t::wrap_value(const value_t& val) { + xpath_t::ptr_op_t temp(new xpath_t::op_t(xpath_t::op_t::VALUE)); + temp->set_value(val); + return temp; +} + +inline xpath_t::ptr_op_t +xpath_t::op_t::wrap_functor(const function_t& fobj) { + xpath_t::ptr_op_t temp(new xpath_t::op_t(xpath_t::op_t::FUNCTION)); + temp->set_function(fobj); + return temp; +} + +template<> +inline xpath_t::symbol_scope_t& +xpath_t::scope_t::find_scope<xpath_t::symbol_scope_t>(bool skip_this) { + optional<scope_t&> scope = find_scope(SYMBOL_SCOPE, skip_this); + assert(scope); + return downcast<symbol_scope_t>(*scope); +} + +template<> +inline xpath_t::call_scope_t& +xpath_t::scope_t::find_scope<xpath_t::call_scope_t>(bool skip_this) { + optional<scope_t&> scope = find_scope(CALL_SCOPE, skip_this); + assert(scope); + return downcast<call_scope_t>(*scope); +} + +template<> +inline xpath_t::context_scope_t& +xpath_t::scope_t::find_scope<xpath_t::context_scope_t>(bool skip_this) { + optional<scope_t&> scope = find_scope(CONTEXT_SCOPE, skip_this); + assert(scope); + return downcast<context_scope_t>(*scope); +} + +#define FIND_SCOPE(scope_type, scope_ref) \ + downcast<xml::xpath_t::scope_t>(scope_ref).find_scope<scope_type>() + +#define CALL_SCOPE(scope_ref) \ + FIND_SCOPE(xml::xpath_t::call_scope_t, scope_ref) +#define SYMBOL_SCOPE(scope_ref) \ + FIND_SCOPE(xml::xpath_t::symbol_scope_t, scope_ref) +#define CONTEXT_SCOPE(scope_ref) \ + FIND_SCOPE(xml::xpath_t::context_scope_t, scope_ref) + +} // namespace xml + +value_t xml_command(xml::xpath_t::call_scope_t& args); + +} // namespace ledger + +#endif // _XPATH_H |