/* * Copyright (c) 2003-2007, John Wiegley. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of New Artisans LLC nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _XPATH_H #define _XPATH_H #include "document.h" namespace ledger { namespace xml { class xpath_t { public: struct op_t; typedef intrusive_ptr ptr_op_t; static void initialize(); static void shutdown(); DECLARE_EXCEPTION(parse_error); DECLARE_EXCEPTION(compile_error); DECLARE_EXCEPTION(calc_error); public: class scope_t; typedef function function_t; #define MAKE_FUNCTOR(x) \ xml::xpath_t::wrap_functor(bind(&x, this, _1)) static ptr_op_t wrap_value(const value_t& val); static ptr_op_t wrap_functor(const function_t& fobj); public: class scope_t : public noncopyable { typedef std::map symbol_map; symbol_map symbols; public: scope_t * parent; value_t::sequence_t args; enum kind_t { NORMAL, STATIC, ARGUMENT } kind; scope_t(scope_t * _parent = NULL, kind_t _kind = NORMAL) : parent(_parent), kind(_kind) { TRACE_CTOR(xpath_t::scope_t, "scope *, kind_t"); } virtual ~scope_t() { TRACE_DTOR(xpath_t::scope_t); } public: virtual void define(const string& name, ptr_op_t def); virtual optional resolve(const string& name, scope_t * locals = NULL) { if (parent) return parent->resolve(name, locals); return none; } virtual ptr_op_t lookup(const string& name); void define(const string& name, const function_t& def); friend struct op_t; }; class function_scope_t : public scope_t { const node_t& node; std::size_t index; std::size_t size; public: function_scope_t(const value_t::sequence_t& _sequence, const node_t& _node, std::size_t _index, scope_t * _parent = NULL) : scope_t(_parent, STATIC), node(_node), index(_index), size(_sequence.size()) {} function_scope_t(const node_t& _node, std::size_t _index, std::size_t _size, scope_t * _parent = NULL) : scope_t(_parent, STATIC), node(_node), index(_index), size(_size) {} virtual optional resolve(const string& name, scope_t * locals = NULL); }; #define XPATH_PARSE_NORMAL 0x00 #define XPATH_PARSE_PARTIAL 0x01 #define XPATH_PARSE_RELAXED 0x02 #define XPATH_PARSE_NO_MIGRATE 0x04 #define XPATH_PARSE_NO_REDUCE 0x08 #define XPATH_PARSE_ALLOW_DATE 0x10 typedef uint_least8_t flags_t; private: struct token_t { enum kind_t { IDENT, // [A-Za-z_][-A-Za-z0-9_:]* VALUE, // any kind of literal value AT_SYM, // @ DOLLAR, // $ DOT, // . DOTDOT, // .. LPAREN, // ( RPAREN, // ) LBRACKET, // ( RBRACKET, // ) EXCLAM, // ! NEQUAL, // != MINUS, // - PLUS, // + STAR, // * POWER, // ** SLASH, // / EQUAL, // = ASSIGN, // := LESS, // < LESSEQ, // <= GREATER, // > GREATEREQ, // >= AMPER, // & PIPE, // | QUESTION, // ? COLON, // : COMMA, // , KW_AND, KW_OR, KW_DIV, KW_MOD, KW_UNION, TOK_EOF, UNKNOWN } kind; char symbol[3]; value_t value; unsigned int length; token_t() : kind(UNKNOWN), length(0) { TRACE_CTOR(xpath_t::token_t, ""); } token_t(const token_t& other) { assert(false); TRACE_CTOR(xpath_t::token_t, "copy"); *this = other; } ~token_t() { TRACE_DTOR(xpath_t::token_t); } token_t& operator=(const token_t& other) { if (&other == this) return *this; assert(false); return *this; } void clear() { kind = UNKNOWN; length = 0; value = 0L; symbol[0] = '\0'; symbol[1] = '\0'; symbol[2] = '\0'; } void parse_ident(std::istream& in); void next(std::istream& in, flags_t flags); void rewind(std::istream& in); void unexpected(); static void unexpected(char c, char wanted = '\0'); }; public: class path_t { typedef function visitor_t; typedef function predicate_t; struct element_t { variant ident; bool recurse; predicate_t predicate; }; std::list elements; typedef std::list::const_iterator element_iterator; struct value_node_appender_t { value_t::sequence_t& sequence; value_node_appender_t(value_t::sequence_t& _sequence) : sequence(_sequence) {} void operator()(node_t& node) { sequence.push_back(&node); } }; public: path_t(const xpath_t& path_expr); value_t find_all(node_t& start, scope_t * scope) { value_t result = value_t::sequence_t(); visit(start, scope, value_node_appender_t(result.as_sequence_lval())); return result; } void visit(node_t& start, scope_t * scope, const function& func) { if (elements.begin() != elements.end()) walk_elements(start, elements.begin(), scope, func); } private: void walk_elements(node_t& start, const element_iterator& element, scope_t * scope, const visitor_t& func); void check_element(node_t& start, const element_iterator& element, scope_t * scope, std::size_t index, std::size_t size, const visitor_t& func); }; class path_iterator_t { path_t path; node_t& start; scope_t * scope; mutable std::vector sequence; mutable bool searched; struct node_appender_t { std::vector& sequence; node_appender_t(std::vector& _sequence) : sequence(_sequence) {} void operator()(node_t& node) { sequence.push_back(&node); } }; public: typedef std::vector::iterator iterator; typedef std::vector::const_iterator const_iterator; path_iterator_t(const xpath_t& path_expr, node_t& _start, scope_t * _scope) : path(path_expr), start(_start), scope(_scope), searched(false) { } iterator begin() { if (! searched) { path.visit(start, scope, node_appender_t(sequence)); searched = true; } return sequence.begin(); } const_iterator begin() const { return const_cast(this)->begin(); } iterator end() { return sequence.end(); } const_iterator end() const { return sequence.end(); } }; struct op_t : public noncopyable { enum kind_t { VOID, VALUE, NODE_ID, NODE_NAME, ATTR_ID, ATTR_NAME, FUNC_NAME, VAR_NAME, ARG_INDEX, CONSTANTS, // constants end here FUNCTION, TERMINALS, // terminals end here O_NOT, O_NEG, O_UNION, O_ADD, O_SUB, O_MUL, O_DIV, O_NEQ, O_EQ, O_LT, O_LTE, O_GT, O_GTE, O_AND, O_OR, O_QUES, O_COLON, O_COMMA, O_DEFINE, O_EVAL, O_ARG, O_FIND, O_RFIND, O_PRED, LAST // operators end here }; kind_t kind; mutable short refc; ptr_op_t left_; variant // used by all binary operators data; op_t(const kind_t _kind) : kind(_kind), refc(0){ TRACE_CTOR(xpath_t::op_t, "const kind_t"); } ~op_t() { TRACE_DTOR(xpath_t::op_t); DEBUG("ledger.xpath.memory", "Destroying " << this); assert(refc == 0); } op_t& operator=(const op_t&); bool is_long() const { return data.type() == typeid(unsigned int); } unsigned int& as_long() { assert(kind == ARG_INDEX || kind == O_ARG); return boost::get(data); } const unsigned int& as_long() const { return const_cast(this)->as_long(); } void set_long(unsigned int val) { data = val; } bool is_value() const { return kind == VALUE; } value_t& as_value() { assert(kind == VALUE); return boost::get(data); } const value_t& as_value() const { return const_cast(this)->as_value(); } void set_value(const value_t& val) { data = val; } bool is_string() const { return data.type() == typeid(string); } string& as_string() { assert(kind == NODE_NAME || kind == ATTR_NAME || kind == FUNC_NAME); return boost::get(data); } const string& as_string() const { return const_cast(this)->as_string(); } void set_string(const string& val) { data = val; } bool is_function() const { return kind == FUNCTION; } function_t& as_function() { assert(kind == FUNCTION); return boost::get(data); } const function_t& as_function() const { return const_cast(this)->as_function(); } void set_function(const function_t& val) { data = val; } bool is_name() const { return data.type() == typeid(node_t::nameid_t); } node_t::nameid_t& as_name() { assert(kind == NODE_ID || kind == ATTR_ID); return boost::get(data); } const node_t::nameid_t& as_name() const { return const_cast(this)->as_name(); } void set_name(const node_t::nameid_t& val) { data = val; } #if 0 bool is_path() const { return kind == PATH; } path_t& as_path() { assert(kind == PATH); return boost::get(data); } const path_t& as_path() const { return const_cast(this)->as_path(); } void set_path(const path_t& val) { data = val; } #endif ptr_op_t& as_op() { assert(kind > TERMINALS); return boost::get(data); } const ptr_op_t& as_op() const { return const_cast(this)->as_op(); } void acquire() const { DEBUG("ledger.xpath.memory", "Acquiring " << this << ", refc now " << refc + 1); assert(refc >= 0); refc++; } void release() const { DEBUG("ledger.xpath.memory", "Releasing " << this << ", refc now " << refc - 1); assert(refc > 0); if (--refc == 0) checked_delete(this); } ptr_op_t& left() { return left_; } const ptr_op_t& left() const { assert(kind > TERMINALS); return left_; } void set_left(const ptr_op_t& expr) { assert(kind > TERMINALS); left_ = expr; } ptr_op_t& right() { assert(kind > TERMINALS); return as_op(); } const ptr_op_t& right() const { assert(kind > TERMINALS); return as_op(); } void set_right(const ptr_op_t& expr) { assert(kind > TERMINALS); data = expr; } static ptr_op_t new_node(kind_t kind, ptr_op_t left = NULL, ptr_op_t right = NULL); ptr_op_t copy(ptr_op_t left = NULL, ptr_op_t right = NULL) const; ptr_op_t compile(const node_t& context, scope_t * scope, bool resolve = false); void find_values(const node_t& context, scope_t * scope, value_t::sequence_t& result_seq, bool recursive); bool test_value(const node_t& context, scope_t * scope, int index = 0); void append_value(value_t::sequence_t& result_seq, value_t& value); static ptr_op_t defer_sequence(value_t::sequence_t& result_seq); bool print(std::ostream& out, document_t& document, const bool relaxed = true, const ptr_op_t& op_to_find = NULL, unsigned long * start_pos = NULL, unsigned long * end_pos = NULL) const; void dump(std::ostream& out, const int depth) const; }; class op_predicate { ptr_op_t op; public: op_predicate(ptr_op_t _op) : op(_op) {} bool operator()(node_t& node, scope_t * scope) { xpath_t result(op->compile(node, scope, true)); return result.ptr->as_value().to_boolean(); } }; public: ptr_op_t ptr; xpath_t& operator=(ptr_op_t _expr) { expr = ""; ptr = _expr; return *this; } #ifdef THREADSAFE mutable token_t lookahead; #else static token_t * lookahead; #endif mutable bool use_lookahead; token_t& next_token(std::istream& in, flags_t tflags) const { if (use_lookahead) use_lookahead = false; else #ifdef THREADSAFE lookahead.next(in, tflags); #else lookahead->next(in, tflags); #endif #ifdef THREADSAFE return lookahead; #else return *lookahead; #endif } void push_token(const token_t& tok) const { #ifdef THREADSAFE assert(&tok == &lookahead); #else assert(&tok == lookahead); #endif use_lookahead = true; } void push_token() const { use_lookahead = true; } ptr_op_t parse_value_term(std::istream& in, flags_t flags) const; ptr_op_t parse_predicate_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_path_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_unary_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_union_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_mul_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_add_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_logic_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_and_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_or_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_querycolon_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_value_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_expr(std::istream& in, flags_t flags = XPATH_PARSE_RELAXED) const; ptr_op_t parse_expr(const string& str, flags_t tflags = XPATH_PARSE_RELAXED) const { std::istringstream stream(str); #if 0 try { #endif return parse_expr(stream, tflags); #if 0 } catch (error * err) { err->context.push_back (new line_context(str, (long)stream.tellg() - 1, "While parsing value expression:")); throw err; } #endif } ptr_op_t parse_expr(const char * p, flags_t tflags = XPATH_PARSE_RELAXED) const { return parse_expr(string(p), tflags); } bool print(std::ostream& out, document_t& document, const bool relaxed, const ptr_op_t op_to_find, unsigned long * start_pos, unsigned long * end_pos) const { if (ptr) ptr->print(out, document, relaxed, op_to_find, start_pos, end_pos); return true; } public: string expr; flags_t flags; // flags used to parse `expr' xpath_t() : ptr(NULL), use_lookahead(false), flags(0) { TRACE_CTOR(xpath_t, ""); } xpath_t(ptr_op_t _ptr) : ptr(_ptr), use_lookahead(false) { TRACE_CTOR(xpath_t, "ptr_op_t"); } xpath_t(const string& _expr, flags_t _flags = XPATH_PARSE_RELAXED) : ptr(NULL), use_lookahead(false), flags(0) { TRACE_CTOR(xpath_t, "const string&, flags_t"); if (! _expr.empty()) parse(_expr, _flags); } xpath_t(std::istream& in, flags_t _flags = XPATH_PARSE_RELAXED) : ptr(NULL), use_lookahead(false), flags(0) { TRACE_CTOR(xpath_t, "std::istream&, flags_t"); parse(in, _flags); } xpath_t(const xpath_t& other) : ptr(other.ptr), use_lookahead(false), expr(other.expr), flags(other.flags) { TRACE_CTOR(xpath_t, "copy"); } virtual ~xpath_t() { TRACE_DTOR(xpath_t); } xpath_t& operator=(const string& _expr) { parse(_expr); return *this; } xpath_t& operator=(const xpath_t& _expr); xpath_t& operator=(xpath_t& _xpath) { ptr = _xpath.ptr; expr = _xpath.expr; flags = _xpath.flags; use_lookahead = false; return *this; } operator ptr_op_t() throw() { return ptr; } operator bool() const throw() { return ptr != NULL; } operator string() const throw() { return expr; } void parse(const string& _expr, flags_t _flags = XPATH_PARSE_RELAXED) { expr = _expr; flags = _flags; ptr = parse_expr(_expr, _flags); } void parse(std::istream& in, flags_t _flags = XPATH_PARSE_RELAXED) { expr = ""; flags = _flags; ptr = parse_expr(in, _flags); } void compile(const node_t& context, scope_t * scope = NULL) { if (ptr.get()) ptr = ptr->compile(context, scope); } virtual value_t calc(const node_t& context, scope_t * scope = NULL) const; static value_t eval(const string& _expr, const node_t& context, scope_t * scope = NULL) { return xpath_t(_expr).calc(context, scope); } path_iterator_t find_all(node_t& start, scope_t * scope) { return path_iterator_t(*this, start, scope); } void visit(node_t& start, scope_t * scope, const function& func) { path_t(*this).visit(start, scope, func); } void print(std::ostream& out, xml::document_t& document) const { print(out, document, true, NULL, NULL, NULL); } void dump(std::ostream& out) const { if (ptr) ptr->dump(out, 0); } friend class scope_t; }; inline void intrusive_ptr_add_ref(xpath_t::op_t * op) { op->acquire(); } inline void intrusive_ptr_release(xpath_t::op_t * op) { op->release(); } } // namespace xml template inline T * get_ptr(xml::xpath_t::scope_t * locals, unsigned int idx) { assert(locals->args.size() > idx); T * ptr = static_cast(locals->args[idx].as_pointer()); assert(ptr); return ptr; } template inline T * get_node_ptr(xml::xpath_t::scope_t * locals, unsigned int idx) { assert(locals->args.size() > idx); T * ptr = polymorphic_downcast(locals->args[idx].as_xml_node()); assert(ptr); return ptr; } class xml_command { public: value_t operator()(xml::xpath_t::scope_t * locals) { std::ostream * out = get_ptr(locals, 0); xml::document_t * doc = get_node_ptr(locals, 1); doc->print(*out); return true; } }; } // namespace ledger #endif // _XPATH_H