From 42f43b7686038e4cbca16d8d2118b139544e6de3 Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Sun, 13 Apr 2008 03:35:00 -0400 Subject: Check in all changes made so far toward 3.0. --- xpath.h | 773 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 773 insertions(+) create mode 100644 xpath.h (limited to 'xpath.h') diff --git a/xpath.h b/xpath.h new file mode 100644 index 00000000..5c364b6a --- /dev/null +++ b/xpath.h @@ -0,0 +1,773 @@ +#ifndef _XPATH_H +#define _XPATH_H + +#include "xml.h" +#include "error.h" +#if 0 +#include "mask.h" +#endif + +#include +#include + +namespace ledger { +namespace xml { + +class xpath_t +{ +public: + struct op_t; + + class parse_error : public error { + public: + parse_error(const std::string& reason, + error_context * ctxt = NULL) throw() + : error(reason, ctxt) {} + virtual ~parse_error() throw() {} + }; + + class compile_error : public error { + public: + compile_error(const std::string& reason, + error_context * ctxt = NULL) throw() + : error(reason, ctxt) {} + virtual ~compile_error() throw() {} + }; + + class calc_error : public error { + public: + calc_error(const std::string& reason, + error_context * ctxt = NULL) throw() + : error(reason, ctxt) {} + virtual ~calc_error() throw() {} + }; + + class context : public error_context { + public: + const xpath_t& xpath; + const op_t * err_node; + + context(const xpath_t& _xpath, + const op_t * _err_node, + const std::string& desc = "") throw(); + virtual ~context() throw(); + + virtual void describe(std::ostream& out) const throw(); + }; + +public: + class scope_t; + + class functor_t { + protected: + std::string fname; + public: + bool wants_args; + + functor_t(const std::string& _fname, bool _wants_args = false) + : fname(_fname), wants_args(_wants_args) {} + virtual ~functor_t() {} + + virtual void operator()(value_t& result, scope_t * locals) = 0; + virtual std::string name() const { return fname; } + }; + + template + class member_functor_t : public functor_t { + public: + T * ptr; + U T::*dptr; + + member_functor_t(const std::string& name, T * _ptr, U T::*_dptr) + : functor_t(name, false), ptr(_ptr), dptr(_dptr) {} + + virtual void operator()(value_t& result, scope_t * locals) { + assert(ptr); + assert(dptr); + result = ptr->*dptr; + } + }; + + template + class member_functor_t : public functor_t { + public: + T * ptr; + std::string T::*dptr; + + member_functor_t(const std::string& name, T * _ptr, std::string T::*_dptr) + : functor_t(name, false), ptr(_ptr), dptr(_dptr) {} + + virtual void operator()(value_t& result, scope_t * locals) { + assert(ptr); + assert(dptr); + result.set_string(ptr->*dptr); + } + }; + + template + class memfun_functor_t : public functor_t { + public: + T * ptr; + void (T::*mptr)(value_t& result); + + memfun_functor_t(const std::string& name, T * _ptr, + void (T::*_mptr)(value_t& result)) + : functor_t(name, false), ptr(_ptr), mptr(_mptr) {} + + virtual void operator()(value_t& result, scope_t * locals = NULL) { + assert(ptr); + assert(mptr); + (ptr->*mptr)(result); + } + }; + + template + class memfun_args_functor_t : public functor_t { + public: + T * ptr; + void (T::*mptr)(value_t& result, scope_t * locals); + + memfun_args_functor_t(const std::string& name, T * _ptr, + void (T::*_mptr)(value_t& result, scope_t * locals)) + : functor_t(name, true), ptr(_ptr), mptr(_mptr) {} + + virtual void operator()(value_t& result, scope_t * locals) { + assert(ptr); + assert(mptr); + (ptr->*mptr)(result, locals); + } + }; + + static op_t * wrap_value(const value_t& val); + static op_t * wrap_sequence(value_t::sequence_t * val); + static op_t * wrap_functor(functor_t * fobj); +#if 0 + static op_t * wrap_mask(const std::string& pattern); +#endif + + template + static op_t * + make_functor(const std::string& name = "", T * ptr, U T::*mptr) { + return wrap_functor(new member_functor_t(name, ptr, mptr)); + } + + template + static op_t * + make_functor(const std::string& fname = "", T * ptr, + void (T::*mptr)(value_t& result)) { + return wrap_functor(new memfun_functor_t(fname, ptr, mptr)); + } + + template + static op_t * + make_functor(const std::string& fname = "", T * ptr, + void (T::*mptr)(value_t& result, scope_t * locals)) { + return wrap_functor(new memfun_args_functor_t(fname, ptr, mptr)); + } + +#define MAKE_FUNCTOR(cls, name) \ + xml::xpath_t::make_functor(#name, this, &cls::name) + +public: + class scope_t + { + typedef std::map symbol_map; + typedef std::pair symbol_pair; + + symbol_map symbols; + + scope_t(const scope_t&); + scope_t& operator=(const scope_t&); + + public: + scope_t * parent; + value_t args; + + enum kind_t { NORMAL, STATIC, ARGUMENT } kind; + + scope_t(scope_t * _parent = NULL, kind_t _kind = NORMAL) + : parent(_parent), kind(_kind) { + TRACE_CTOR("xpath_t::scope_t(scope *, kind_t)"); + } + + virtual ~scope_t() { + TRACE_DTOR("xpath_t::scope_t"); + for (symbol_map::iterator i = symbols.begin(); + i != symbols.end(); + i++) + (*i).second->release(); + } + + public: + virtual void define(const std::string& name, op_t * def); + virtual bool resolve(const std::string& name, value_t& result, + scope_t * locals = NULL) { + if (parent) + return parent->resolve(name, result, locals); + return false; + } + virtual op_t * lookup(const std::string& name); + + void define(const std::string& name, functor_t * def); + + friend struct op_t; + }; + + class function_scope_t : public scope_t + { + value_t::sequence_t * sequence; + value_t * value; + int index; + + public: + function_scope_t(value_t::sequence_t * _sequence, value_t * _value, + int _index, scope_t * parent = NULL) + : scope_t(parent, STATIC), + sequence(_sequence), value(_value), index(_index) {} + + virtual bool resolve(const std::string& name, value_t& result, + scope_t * locals = NULL); + }; + +#define XPATH_PARSE_NORMAL 0x00 +#define XPATH_PARSE_PARTIAL 0x01 +#define XPATH_PARSE_RELAXED 0x02 +#define XPATH_PARSE_NO_MIGRATE 0x04 +#define XPATH_PARSE_NO_REDUCE 0x08 +#if 0 +#define XPATH_PARSE_REGEXP 0x10 +#endif +#define XPATH_PARSE_ALLOW_DATE 0x20 + +private: + struct token_t + { + enum kind_t { + IDENT, // [A-Za-z_][-A-Za-z0-9_:]* + VALUE, // any kind of literal value +#if 0 + REGEXP, // /regexp/ jww (2006-09-24): deprecate + // in favor of a "match" function +#endif + AT_SYM, // @ + DOLLAR, // $ + DOT, // . + DOTDOT, // .. + LPAREN, // ( + RPAREN, // ) + LBRACKET, // ( + RBRACKET, // ) + EXCLAM, // ! + NEQUAL, // != + MINUS, // - + PLUS, // + + STAR, // * + POWER, // ** + SLASH, // / + EQUAL, // = + ASSIGN, // := + LESS, // < + LESSEQ, // <= + GREATER, // > + GREATEREQ, // >= + AMPER, // & + PIPE, // | + QUESTION, // ? + COLON, // : + COMMA, // , +#if 0 + MATCH, // =~ + NMATCH, // !~ + PERCENT, // % +#endif + KW_AND, + KW_OR, + KW_DIV, + KW_MOD, + KW_UNION, + TOK_EOF, + UNKNOWN + } kind; + + char symbol[3]; + value_t value; + unsigned int length; + + token_t() : kind(UNKNOWN), length(0) { + TRACE_CTOR("xpath_t::token_t()"); + } + + token_t(const token_t& other) { + assert(0); + TRACE_CTOR("xpath_t::token_t(copy)"); + *this = other; + } + + ~token_t() { + TRACE_DTOR("xpath_t::token_t"); + } + + token_t& operator=(const token_t& other) { + if (&other == this) + return *this; + assert(0); + } + + void clear() { + kind = UNKNOWN; + length = 0; + + symbol[0] = '\0'; + symbol[1] = '\0'; + symbol[2] = '\0'; + } + + void parse_ident(std::istream& in); + void next(std::istream& in, unsigned short flags); + void rewind(std::istream& in); + void unexpected(); + + static void unexpected(char c, char wanted = '\0'); + }; + +public: + struct op_t + { + enum kind_t { + VOID, + VALUE, + + NODE_NAME, + NODE_ID, + FUNC_NAME, + ATTR_NAME, + VAR_NAME, + + ARG_INDEX, + + CONSTANTS, // constants end here + + FUNCTOR, +#if 0 + MASK, +#endif + + TERMINALS, // terminals end here + + O_NOT, + O_NEG, + + O_UNION, + + O_ADD, + O_SUB, + O_MUL, + O_DIV, + + O_NEQ, + O_EQ, + O_LT, + O_LTE, + O_GT, + O_GTE, + + O_AND, + O_OR, + + O_QUES, + O_COLON, + + O_COMMA, + +#if 0 + O_MATCH, + O_NMATCH, +#endif + + O_DEFINE, + O_EVAL, + O_ARG, + +#if 0 + O_PERC, +#endif + + O_FIND, + O_RFIND, + O_PRED, + + LAST // operators end here + }; + + kind_t kind; + mutable short refc; + op_t * left; + + union { + value_t * valuep; // used by constant VALUE + std::string * name; // used by constant SYMBOL + unsigned int arg_index; // used by ARG_INDEX and O_ARG + functor_t * functor; // used by terminal FUNCTOR + unsigned int name_id; // used by NODE_NAME and ATTR_NAME +#if 0 + mask_t * mask; // used by terminal MASK +#endif + op_t * right; // used by all operators + }; + + op_t(const kind_t _kind) + : kind(_kind), refc(0), left(NULL), right(NULL) { + TRACE_CTOR("xpath_t::op_t(const kind_t)"); + } + op_t(const op_t&); + ~op_t(); + + op_t& operator=(const op_t&); + + bool constant() const { + return kind == VALUE; + } + void get_value(value_t& result) const; + value_t value() const { + value_t temp; + get_value(temp); + return temp; + } + + functor_t * functor_obj() const { + if (kind == FUNCTOR) + return functor; + else + return NULL; + } + + void release() const { + DEBUG_PRINT("ledger.xpath.memory", + "Releasing " << this << ", refc now " << refc - 1); + assert(refc > 0); + if (--refc == 0) + delete this; + } + op_t * acquire() { + DEBUG_PRINT("ledger.xpath.memory", + "Acquiring " << this << ", refc now " << refc + 1); + assert(refc >= 0); + refc++; + return this; + } + const op_t * acquire() const { + DEBUG_PRINT("ledger.xpath.memory", + "Acquiring " << this << ", refc now " << refc + 1); + assert(refc >= 0); + refc++; + return this; + } + + void set_left(op_t * expr) { + assert(kind > TERMINALS); + if (left) + left->release(); + left = expr ? expr->acquire() : NULL; + } + + void set_right(op_t * expr) { + assert(kind > TERMINALS); + if (right) + right->release(); + right = expr ? expr->acquire() : NULL; + } + + static op_t * new_node(kind_t kind, op_t * left = NULL, + op_t * right = NULL); + + op_t * copy(op_t * left = NULL, + op_t * right = NULL) const; + op_t * compile(value_t * context, scope_t * scope, + bool resolve = false); + + void find_values(value_t * context, scope_t * scope, + value_t::sequence_t& result_seq, bool recursive); + bool test_value(value_t * context, scope_t * scope, int index = 0); + + void append_value(value_t& value, value_t::sequence_t& result_seq); + + static op_t * defer_sequence(value_t::sequence_t& result_seq); + + bool write(std::ostream& out, + const bool relaxed = true, + const op_t * op_to_find = NULL, + unsigned long * start_pos = NULL, + unsigned long * end_pos = NULL) const; + + void dump(std::ostream& out, const int depth) const; + }; + +public: + op_t * ptr; + + xpath_t& operator=(op_t * _expr) { + expr = ""; + reset(_expr); + return *this; + } + + op_t& operator*() throw() { + return *ptr; + } + const op_t& operator*() const throw() { + return *ptr; + } + op_t * operator->() throw() { + return ptr; + } + const op_t * operator->() const throw() { + return ptr; + } + + op_t * get() throw() { return ptr; } + const op_t * get() const throw() { return ptr; } + + op_t * release() throw() { + op_t * tmp = ptr; + ptr = 0; + return tmp; + } + + void reset(op_t * p = 0) throw() { + if (p != ptr) { + if (ptr) + ptr->release(); + ptr = p; + } + } + +#ifdef THREADSAFE + mutable token_t lookahead; +#else + static token_t lookahead; +#endif + mutable bool use_lookahead; + + token_t& next_token(std::istream& in, unsigned short flags) const { + if (use_lookahead) + use_lookahead = false; + else + lookahead.next(in, flags); + return lookahead; + } + void push_token(const token_t& tok) const { + assert(&tok == &lookahead); + use_lookahead = true; + } + void push_token() const { + use_lookahead = true; + } + + op_t * parse_value_term(std::istream& in, unsigned short flags) const; + op_t * parse_predicate_expr(std::istream& in, unsigned short flags) const; + op_t * parse_path_expr(std::istream& in, unsigned short flags) const; + op_t * parse_unary_expr(std::istream& in, unsigned short flags) const; + op_t * parse_union_expr(std::istream& in, unsigned short flags) const; + op_t * parse_mul_expr(std::istream& in, unsigned short flags) const; + op_t * parse_add_expr(std::istream& in, unsigned short flags) const; + op_t * parse_logic_expr(std::istream& in, unsigned short flags) const; + op_t * parse_and_expr(std::istream& in, unsigned short flags) const; + op_t * parse_or_expr(std::istream& in, unsigned short flags) const; + op_t * parse_querycolon_expr(std::istream& in, unsigned short flags) const; + op_t * parse_value_expr(std::istream& in, unsigned short flags) const; + + op_t * parse_expr(std::istream& in, + unsigned short flags = XPATH_PARSE_RELAXED) const; + + op_t * parse_expr(const std::string& str, + unsigned short flags = XPATH_PARSE_RELAXED) const + { + std::istringstream stream(str); + try { + return parse_expr(stream, flags); + } + catch (error * err) { + err->context.push_back + (new line_context(str, (long)stream.tellg() - 1, + "While parsing value expression:")); + throw err; + } + } + + op_t * parse_expr(const char * p, + unsigned short flags = XPATH_PARSE_RELAXED) const { + return parse_expr(std::string(p), flags); + } + + bool write(std::ostream& out, + const bool relaxed, + const op_t * op_to_find, + unsigned long * start_pos, + unsigned long * end_pos) const { + if (ptr) + ptr->write(out, relaxed, op_to_find, start_pos, end_pos); + } + +public: + std::string expr; + unsigned short flags; // flags used to parse `expr' + + xpath_t() : ptr(NULL), use_lookahead(false), flags(0) { + TRACE_CTOR("xpath_t"); + } + xpath_t(op_t * _ptr) : ptr(_ptr), use_lookahead(false) { + TRACE_CTOR("xpath_t(op_t *)"); + } + + xpath_t(const std::string& _expr, + unsigned short _flags = XPATH_PARSE_RELAXED) + : ptr(NULL), use_lookahead(false), flags(0) { + TRACE_CTOR("xpath_t(const std::string&, unsigned short)"); + if (! _expr.empty()) + parse(_expr, _flags); + } + xpath_t(std::istream& in, unsigned short _flags = XPATH_PARSE_RELAXED) + : ptr(NULL), use_lookahead(false), flags(0) { + TRACE_CTOR("xpath_t(std::istream&, unsigned short)"); + parse(in, _flags); + } + xpath_t(const xpath_t& other) + : ptr(other.ptr ? other.ptr->acquire() : NULL), + use_lookahead(false), expr(other.expr), flags(other.flags) { + TRACE_CTOR("xpath_t(copy)"); + } + virtual ~xpath_t() { + TRACE_DTOR("xpath_t"); + if (ptr) + ptr->release(); + } + + xpath_t& operator=(const std::string& _expr) { + parse(_expr); + return *this; + } + xpath_t& operator=(const xpath_t& _expr); + xpath_t& operator=(xpath_t& _xpath) { + ptr = _xpath.ptr->acquire(); + expr = _xpath.expr; + flags = _xpath.flags; + use_lookahead = false; + return *this; + } + + operator op_t *() throw() { + return ptr; + } + + operator bool() const throw() { + return ptr != NULL; + } + operator std::string() const throw() { + return expr; + } + + void parse(const std::string& _expr, unsigned short _flags = XPATH_PARSE_RELAXED) { + expr = _expr; + flags = _flags; + op_t * tmp = parse_expr(_expr, _flags); + assert(tmp); + reset(tmp ? tmp->acquire() : NULL); + } + void parse(std::istream& in, unsigned short _flags = XPATH_PARSE_RELAXED) { + expr = ""; + flags = _flags; + op_t * tmp = parse_expr(in, _flags); + assert(tmp); + reset(tmp ? tmp->acquire() : NULL); + } + + void compile(const std::string& _expr, scope_t * scope = NULL, + unsigned short _flags = XPATH_PARSE_RELAXED) { + parse(_expr, _flags); + // jww (2006-09-24): fix + compile((node_t *)NULL, scope); + } + void compile(std::istream& in, scope_t * scope = NULL, + unsigned short _flags = XPATH_PARSE_RELAXED) { + parse(in, _flags); + // jww (2006-09-24): fix + compile((node_t *)NULL, scope); + } + + void compile(document_t * document, scope_t * scope = NULL) { + if (! document) + document = new xml::document_t; + compile(document->top, scope); + } + void compile(node_t * top_node, scope_t * scope = NULL) { + if (ptr) { + value_t noderef(top_node); + op_t * compiled = ptr->compile(&noderef, scope); + if (compiled == ptr) + compiled->release(); + else + reset(compiled); + } + } + + virtual void calc(value_t& result, node_t * node, scope_t * scope = NULL) const; + + virtual value_t calc(document_t * document, scope_t * scope = NULL) const { + if (! ptr) + return 0L; + value_t temp; + calc(temp, document ? document->top : NULL, scope); + return temp; + } + virtual value_t calc(node_t * context, scope_t * scope = NULL) const { + if (! ptr) + return 0L; + value_t temp; + calc(temp, context, scope); + return temp; + } + + static value_t eval(const std::string& _expr, document_t * document, + scope_t * scope = NULL) { + xpath_t temp(_expr); + return temp.calc(document, scope); + } + + void write(std::ostream& out) const { + write(out, true, NULL, NULL, NULL); + } + void dump(std::ostream& out) const { + if (ptr) + ptr->dump(out, 0); + } + + friend class scope_t; +}; + +} // namespace xml + +template +inline T * get_ptr(xml::xpath_t::scope_t * locals, int idx) { + assert(locals->args.size() > idx); + T * ptr = static_cast(locals->args[idx].to_pointer()); + assert(ptr); + return ptr; +} + +class xml_command : public xml::xpath_t::functor_t +{ + public: + xml_command() : xml::xpath_t::functor_t("xml") {} + + virtual void operator()(value_t& result, xml::xpath_t::scope_t * locals) { + std::ostream * out = get_ptr(locals, 0); + xml::document_t * doc = get_ptr(locals, 1); + + doc->write(*out); + } + +}; + +} // namespace ledger + +#endif // _XPATH_H -- cgit v1.2.3