/* * Copyright (c) 2003-2007, John Wiegley. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of New Artisans LLC nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _XPATH_H #define _XPATH_H #include "document.h" namespace ledger { namespace xml { class xpath_t { public: struct op_t; typedef intrusive_ptr ptr_op_t; static void initialize(); static void shutdown(); DECLARE_EXCEPTION(parse_error); DECLARE_EXCEPTION(compile_error); DECLARE_EXCEPTION(calc_error); public: class scope_t; typedef function function_t; #define MAKE_FUNCTOR(x) \ xml::xpath_t::wrap_functor(bind(&x, this, _1, _2)) static ptr_op_t wrap_value(const value_t& val); static ptr_op_t wrap_sequence(const value_t::sequence_t& val); static ptr_op_t wrap_functor(const function_t& fobj); public: class scope_t : public noncopyable { typedef std::map symbol_map; symbol_map symbols; public: scope_t * parent; value_t args; enum kind_t { NORMAL, STATIC, ARGUMENT } kind; scope_t(scope_t * _parent = NULL, kind_t _kind = NORMAL) : parent(_parent), kind(_kind) { TRACE_CTOR(xpath_t::scope_t, "scope *, kind_t"); } virtual ~scope_t() { TRACE_DTOR(xpath_t::scope_t); } public: virtual void define(const string& name, ptr_op_t def); virtual bool resolve(const string& name, value_t& result, scope_t * locals = NULL) { if (parent) return parent->resolve(name, result, locals); return false; } virtual ptr_op_t lookup(const string& name); void define(const string& name, const function_t& def); friend struct op_t; }; class function_scope_t : public scope_t { value_t::sequence_t sequence; value_t value; int index; public: function_scope_t(const value_t::sequence_t& _sequence, value_t * _value, int _index, scope_t * _parent = NULL) : scope_t(_parent, STATIC), sequence(_sequence), value(_value), index(_index) {} function_scope_t(const value_t& _value, int _index, scope_t * _parent = NULL) : scope_t(_parent, STATIC), value(_value), index(_index) {} virtual bool resolve(const string& name, value_t& result, scope_t * locals = NULL); }; #define XPATH_PARSE_NORMAL 0x00 #define XPATH_PARSE_PARTIAL 0x01 #define XPATH_PARSE_RELAXED 0x02 #define XPATH_PARSE_NO_MIGRATE 0x04 #define XPATH_PARSE_NO_REDUCE 0x08 #define XPATH_PARSE_ALLOW_DATE 0x10 typedef uint_least8_t flags_t; private: struct token_t { enum kind_t { IDENT, // [A-Za-z_][-A-Za-z0-9_:]* VALUE, // any kind of literal value AT_SYM, // @ DOLLAR, // $ DOT, // . DOTDOT, // .. LPAREN, // ( RPAREN, // ) LBRACKET, // ( RBRACKET, // ) EXCLAM, // ! NEQUAL, // != MINUS, // - PLUS, // + STAR, // * POWER, // ** SLASH, // / EQUAL, // = ASSIGN, // := LESS, // < LESSEQ, // <= GREATER, // > GREATEREQ, // >= AMPER, // & PIPE, // | QUESTION, // ? COLON, // : COMMA, // , KW_AND, KW_OR, KW_DIV, KW_MOD, KW_UNION, TOK_EOF, UNKNOWN } kind; char symbol[3]; value_t value; unsigned int length; token_t() : kind(UNKNOWN), length(0) { TRACE_CTOR(xpath_t::token_t, ""); } token_t(const token_t& other) { assert(false); TRACE_CTOR(xpath_t::token_t, "copy"); *this = other; } ~token_t() { TRACE_DTOR(xpath_t::token_t); } token_t& operator=(const token_t& other) { if (&other == this) return *this; assert(false); return *this; } void clear() { kind = UNKNOWN; length = 0; value = 0L; symbol[0] = '\0'; symbol[1] = '\0'; symbol[2] = '\0'; } void parse_ident(std::istream& in); void next(std::istream& in, flags_t flags); void rewind(std::istream& in); void unexpected(); static void unexpected(char c, char wanted = '\0'); }; public: struct op_t : public noncopyable { enum kind_t { VOID, VALUE, NODE_ID, NODE_NAME, ATTR_ID, ATTR_NAME, FUNC_NAME, VAR_NAME, ARG_INDEX, CONSTANTS, // constants end here FUNCTION, TERMINALS, // terminals end here O_NOT, O_NEG, O_UNION, O_ADD, O_SUB, O_MUL, O_DIV, O_NEQ, O_EQ, O_LT, O_LTE, O_GT, O_GTE, O_AND, O_OR, O_QUES, O_COLON, O_COMMA, O_DEFINE, O_EVAL, O_ARG, O_FIND, O_RFIND, O_PRED, LAST // operators end here }; kind_t kind; mutable short refc; ptr_op_t left_; variant, // used by constant VALUE string, // used by constant SYMBOL function_t, // used by terminal FUNCTION node_t::nameid_t, // used by NODE_NAME and ATTR_NAME ptr_op_t> // used by all binary operators data; op_t(const kind_t _kind) : kind(_kind), refc(0){ TRACE_CTOR(xpath_t::op_t, "const kind_t"); } ~op_t() { TRACE_DTOR(xpath_t::op_t); DEBUG("ledger.xpath.memory", "Destroying " << this); assert(refc == 0); } op_t& operator=(const op_t&); bool is_value() const { return kind == VALUE; } unsigned int& as_long() { assert(kind == ARG_INDEX || kind == O_ARG); return boost::get(data); } const unsigned int& as_long() const { return const_cast(this)->as_long(); } void set_long(unsigned int val) { data = val; } value_t& as_value() { assert(kind == VALUE); value_t * val = boost::get >(data).get(); assert(val); return *val; } const value_t& as_value() const { return const_cast(this)->as_value(); } void set_value(value_t * val) { // jww (2007-05-14): Ugh, fix this data = shared_ptr(val); } string& as_string() { assert(kind == NODE_NAME || kind == ATTR_NAME || kind == FUNC_NAME); return boost::get(data); } const string& as_string() const { return const_cast(this)->as_string(); } void set_string(const string& val) { data = val; } function_t& as_function() { assert(kind == FUNCTION); return boost::get(data); } const function_t& as_function() const { return const_cast(this)->as_function(); } void set_function(const function_t& val) { data = val; } node_t::nameid_t& as_name() { assert(kind == NODE_ID || kind == ATTR_ID); return boost::get(data); } const node_t::nameid_t& as_name() const { return const_cast(this)->as_name(); } void set_name(const node_t::nameid_t& val) { data = val; } ptr_op_t& as_op() { assert(kind > TERMINALS); return boost::get(data); } const ptr_op_t& as_op() const { return const_cast(this)->as_op(); } void release() const { DEBUG("ledger.xpath.memory", "Releasing " << this << ", refc now " << refc - 1); assert(refc > 0); if (--refc == 0) checked_delete(this); } void acquire() { DEBUG("ledger.xpath.memory", "Acquiring " << this << ", refc now " << refc + 1); assert(refc >= 0); refc++; } ptr_op_t& left() { return left_; } const ptr_op_t& left() const { assert(kind > TERMINALS); return left_; } void set_left(const ptr_op_t& expr) { assert(kind > TERMINALS); left_ = expr; } ptr_op_t& right() { assert(kind > TERMINALS); return as_op(); } const ptr_op_t& right() const { assert(kind > TERMINALS); return as_op(); } void set_right(const ptr_op_t& expr) { assert(kind > TERMINALS); data = expr; } static ptr_op_t new_node(kind_t kind, ptr_op_t left = NULL, ptr_op_t right = NULL); ptr_op_t copy(ptr_op_t left = NULL, ptr_op_t right = NULL) const; ptr_op_t compile(value_t& context, scope_t * scope, bool resolve = false); void find_values(value_t& context, scope_t * scope, value_t::sequence_t& result_seq, bool recursive); bool test_value(value_t& context, scope_t * scope, int index = 0); void append_value(value_t& value, value_t::sequence_t& result_seq); static ptr_op_t defer_sequence(value_t::sequence_t& result_seq); bool print(std::ostream& out, const bool relaxed = true, const ptr_op_t& op_to_find = NULL, unsigned long * start_pos = NULL, unsigned long * end_pos = NULL) const; void dump(std::ostream& out, const int depth) const; }; public: ptr_op_t ptr; xpath_t& operator=(ptr_op_t _expr) { expr = ""; ptr = _expr; return *this; } #ifdef THREADSAFE mutable token_t lookahead; #else static token_t * lookahead; #endif mutable bool use_lookahead; token_t& next_token(std::istream& in, flags_t tflags) const { if (use_lookahead) use_lookahead = false; else #ifdef THREADSAFE lookahead.next(in, tflags); #else lookahead->next(in, tflags); #endif #ifdef THREADSAFE return lookahead; #else return *lookahead; #endif } void push_token(const token_t& tok) const { #ifdef THREADSAFE assert(&tok == &lookahead); #else assert(&tok == lookahead); #endif use_lookahead = true; } void push_token() const { use_lookahead = true; } ptr_op_t parse_value_term(std::istream& in, flags_t flags) const; ptr_op_t parse_predicate_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_path_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_unary_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_union_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_mul_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_add_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_logic_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_and_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_or_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_querycolon_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_value_expr(std::istream& in, flags_t flags) const; ptr_op_t parse_expr(std::istream& in, flags_t flags = XPATH_PARSE_RELAXED) const; ptr_op_t parse_expr(const string& str, flags_t tflags = XPATH_PARSE_RELAXED) const { std::istringstream stream(str); #if 0 try { #endif return parse_expr(stream, tflags); #if 0 } catch (error * err) { err->context.push_back (new line_context(str, (long)stream.tellg() - 1, "While parsing value expression:")); throw err; } #endif } ptr_op_t parse_expr(const char * p, flags_t tflags = XPATH_PARSE_RELAXED) const { return parse_expr(string(p), tflags); } bool print(std::ostream& out, const bool relaxed, const ptr_op_t op_to_find, unsigned long * start_pos, unsigned long * end_pos) const { if (ptr) ptr->print(out, relaxed, op_to_find, start_pos, end_pos); return true; } public: string expr; flags_t flags; // flags used to parse `expr' xpath_t() : ptr(NULL), use_lookahead(false), flags(0) { TRACE_CTOR(xpath_t, ""); } xpath_t(ptr_op_t _ptr) : ptr(_ptr), use_lookahead(false) { TRACE_CTOR(xpath_t, "ptr_op_t"); } xpath_t(const string& _expr, flags_t _flags = XPATH_PARSE_RELAXED) : ptr(NULL), use_lookahead(false), flags(0) { TRACE_CTOR(xpath_t, "const string&, flags_t"); if (! _expr.empty()) parse(_expr, _flags); } xpath_t(std::istream& in, flags_t _flags = XPATH_PARSE_RELAXED) : ptr(NULL), use_lookahead(false), flags(0) { TRACE_CTOR(xpath_t, "std::istream&, flags_t"); parse(in, _flags); } xpath_t(const xpath_t& other) : ptr(other.ptr), use_lookahead(false), expr(other.expr), flags(other.flags) { TRACE_CTOR(xpath_t, "copy"); } virtual ~xpath_t() { TRACE_DTOR(xpath_t); } xpath_t& operator=(const string& _expr) { parse(_expr); return *this; } xpath_t& operator=(const xpath_t& _expr); xpath_t& operator=(xpath_t& _xpath) { ptr = _xpath.ptr; expr = _xpath.expr; flags = _xpath.flags; use_lookahead = false; return *this; } operator ptr_op_t() throw() { return ptr; } operator bool() const throw() { return ptr != NULL; } operator string() const throw() { return expr; } void parse(const string& _expr, flags_t _flags = XPATH_PARSE_RELAXED) { expr = _expr; flags = _flags; ptr = parse_expr(_expr, _flags); } void parse(std::istream& in, flags_t _flags = XPATH_PARSE_RELAXED) { expr = ""; flags = _flags; ptr = parse_expr(in, _flags); } void compile(node_t& top_node, scope_t * scope = NULL) { if (ptr.get()) { value_t noderef(&top_node); ptr = ptr->compile(noderef, scope); } } virtual void calc(value_t& result, node_t& node, scope_t * scope = NULL) const; virtual value_t calc(node_t& tcontext, scope_t * scope = NULL) const { if (! ptr) return 0L; value_t temp; calc(temp, tcontext, scope); return temp; } static void eval(value_t& result, const string& _expr, node_t& top, scope_t * scope = NULL) { xpath_t temp(_expr); temp.calc(result, top, scope); } static value_t eval(const string& _expr, node_t& top, scope_t * scope = NULL) { xpath_t temp(_expr); return temp.calc(top, scope); } void print(std::ostream& out) const { print(out, true, NULL, NULL, NULL); } void dump(std::ostream& out) const { if (ptr) ptr->dump(out, 0); } friend class scope_t; }; inline std::ostream& operator<<(std::ostream& out, const xpath_t::op_t& op) { op.print(out); return out; } inline void intrusive_ptr_add_ref(xpath_t::op_t * op) { op->acquire(); } inline void intrusive_ptr_release(xpath_t::op_t * op) { op->release(); } } // namespace xml template inline T * get_ptr(xml::xpath_t::scope_t * locals, unsigned int idx) { assert(locals->args.size() > idx); T * ptr = static_cast(locals->args[idx].as_pointer()); assert(ptr); return ptr; } template inline T * get_node_ptr(xml::xpath_t::scope_t * locals, unsigned int idx) { assert(locals->args.size() > idx); T * ptr = polymorphic_downcast(locals->args[idx].as_xml_node()); assert(ptr); return ptr; } class xml_command { public: void operator()(value_t&, xml::xpath_t::scope_t * locals) { std::ostream * out = get_ptr(locals, 0); xml::document_t * doc = get_node_ptr(locals, 1); doc->print(*out); } }; } // namespace ledger #endif // _XPATH_H