From 47df7dd60e9209db3be91a7b29a91911ee4a846b Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Wed, 28 Oct 2009 18:40:31 -0400 Subject: Rewrote the report query parser It is now a full parser that parses report queries directly into value expression trees. These then get rendered into text so that other options may extend the expression. --- src/predicate.cc | 449 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 302 insertions(+), 147 deletions(-) (limited to 'src/predicate.cc') diff --git a/src/predicate.cc b/src/predicate.cc index ce71a180..4f712904 100644 --- a/src/predicate.cc +++ b/src/predicate.cc @@ -32,175 +32,330 @@ #include #include "predicate.h" +#include "op.h" namespace ledger { -string args_to_predicate_expr(value_t::sequence_t::const_iterator& begin, - value_t::sequence_t::const_iterator end) +query_lexer_t::token_t query_lexer_t::next_token() { - std::ostringstream expr; - - bool append_or = false; - bool only_parenthesis = false; - - while (begin != end) { - string arg = (*begin).as_string(); - string prefix; + if (token_cache.kind != token_t::UNKNOWN) { + token_t tok = token_cache; + token_cache = token_t(); + return tok; + } - if (arg == "show") { - ++begin; - break; + if (arg_i == arg_end) { + if (begin == end || ++begin == end) { + return token_t(token_t::END_REACHED); + } else { + arg_i = (*begin).as_string().begin(); + arg_end = (*begin).as_string().end(); } + } - bool parse_argument = true; - bool only_closed_parenthesis = false;; + resume: + bool consume_next = false; + switch (*arg_i) { + case ' ': + case '\t': + case '\r': + case '\n': + if (++arg_i == arg_end) + return next_token(); + goto resume; - if (arg == "not" || arg == "NOT") { - if (append_or) - prefix = " | ! "; - else - prefix = " ! "; - parse_argument = false; - append_or = false; - } - else if (arg == "and" || arg == "AND") { - prefix = " & "; - parse_argument = false; - append_or = false; - } - else if (arg == "or" || arg == "OR") { - prefix = " | "; - parse_argument = false; - append_or = false; - } - else if (append_or) { - if (! only_parenthesis) - prefix = " | "; + case '(': ++arg_i; return token_t(token_t::LPAREN); + case ')': ++arg_i; return token_t(token_t::RPAREN); + case '&': ++arg_i; return token_t(token_t::TOK_AND); + case '|': ++arg_i; return token_t(token_t::TOK_OR); + case '!': ++arg_i; return token_t(token_t::TOK_NOT); + case '@': ++arg_i; return token_t(token_t::TOK_PAYEE); + case '#': ++arg_i; return token_t(token_t::TOK_CODE); + case '%': ++arg_i; return token_t(token_t::TOK_META); + case '=': + // The '=' keyword at the beginning of a string causes the entire string + // to be taken as an expression. + if (arg_i == (*begin).as_string().begin()) + consume_whitespace = true; + ++arg_i; + return token_t(token_t::TOK_EQ); + + case '\\': + consume_next = true; + ++arg_i; + // fall through... + default: { + string ident; + string::const_iterator beg = arg_i; + for (; arg_i != arg_end; ++arg_i) { + switch (*arg_i) { + case ' ': + case '\t': + case '\n': + case '\r': + if (! consume_whitespace) + goto test_ident; + else + ident.push_back(*arg_i); + break; + case '(': + case ')': + case '&': + case '|': + case '!': + case '@': + case '#': + case '%': + case '=': + if (! consume_next) + goto test_ident; + // fall through... + default: + ident.push_back(*arg_i); + break; + } } - else { - append_or = true; + consume_whitespace = false; + + test_ident: + if (ident == "and") + return token_t(token_t::TOK_AND); + else if (ident == "or") + return token_t(token_t::TOK_OR); + else if (ident == "not") + return token_t(token_t::TOK_NOT); + else if (ident == "account") + return token_t(token_t::TOK_ACCOUNT); + else if (ident == "desc") + return token_t(token_t::TOK_PAYEE); + else if (ident == "payee") + return token_t(token_t::TOK_PAYEE); + else if (ident == "code") + return token_t(token_t::TOK_CODE); + else if (ident == "note") + return token_t(token_t::TOK_NOT); + else if (ident == "tag") + return token_t(token_t::TOK_META); + else if (ident == "meta") + return token_t(token_t::TOK_META); + else if (ident == "data") + return token_t(token_t::TOK_META); + else if (ident == "expr") { + // The expr keyword takes the whole of the next string as its + // argument. + consume_whitespace = true; + return token_t(token_t::TOK_EXPR); } + else + return token_t(token_t::TERM, ident); + break; + } + } - value_t::sequence_t::const_iterator next = begin; - if (++next != end) { - if (arg == "desc" || arg == "DESC" || - arg == "payee" || arg == "PAYEE") { - arg = string("@") + (*++begin).as_string(); - } - else if (arg == "code" || arg == "CODE") { - arg = string("#") + (*++begin).as_string(); - } - else if (arg == "note" || arg == "NOTE") { - arg = string("&") + (*++begin).as_string(); - } - else if (arg == "tag" || arg == "TAG" || - arg == "meta" || arg == "META" || - arg == "data" || arg == "DATA") { - arg = string("%") + (*++begin).as_string(); - } - else if (arg == "expr" || arg == "EXPR") { - arg = string("=") + (*++begin).as_string(); + return token_t(token_t::UNKNOWN); +} + +void query_lexer_t::token_t::unexpected() +{ + kind_t prev_kind = kind; + + kind = UNKNOWN; + + switch (prev_kind) { + case END_REACHED: + throw_(parse_error, _("Unexpected end of expression")); + case TERM: + throw_(parse_error, _("Unexpected string '%1'") << *value); + default: + throw_(parse_error, _("Unexpected token '%1'") << symbol()); + } +} + +void query_lexer_t::token_t::expected(char wanted, char c) +{ + kind = UNKNOWN; + + if (c == '\0' || c == -1) { + if (wanted == '\0' || wanted == -1) + throw_(parse_error, _("Unexpected end")); + else + throw_(parse_error, _("Missing '%1'") << wanted); + } else { + if (wanted == '\0' || wanted == -1) + throw_(parse_error, _("Invalid char '%1'") << c); + else + throw_(parse_error, _("Invalid char '%1' (wanted '%2')") << c << wanted); + } +} + +expr_t::ptr_op_t +query_parser_t::parse_query_term(query_lexer_t::token_t::kind_t tok_context) +{ + expr_t::ptr_op_t node; + + query_lexer_t::token_t tok = lexer.next_token(); + switch (tok.kind) { + case query_lexer_t::token_t::END_REACHED: + break; + + case query_lexer_t::token_t::TOK_ACCOUNT: + case query_lexer_t::token_t::TOK_PAYEE: + case query_lexer_t::token_t::TOK_CODE: + case query_lexer_t::token_t::TOK_NOTE: + case query_lexer_t::token_t::TOK_META: + case query_lexer_t::token_t::TOK_EXPR: + node = parse_query_term(tok.kind); + if (! node) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol()); + break; + + case query_lexer_t::token_t::TERM: + assert(tok.value); + if (tok_context == query_lexer_t::token_t::TOK_META) { + assert(0); + } else { + node = new expr_t::op_t(expr_t::op_t::O_MATCH); + + expr_t::ptr_op_t ident; + ident = new expr_t::op_t(expr_t::op_t::IDENT); + switch (tok_context) { + case query_lexer_t::token_t::TOK_ACCOUNT: + ident->set_ident("account"); break; + case query_lexer_t::token_t::TOK_PAYEE: + ident->set_ident("payee"); break; + case query_lexer_t::token_t::TOK_CODE: + ident->set_ident("code"); break; + case query_lexer_t::token_t::TOK_NOTE: + ident->set_ident("note"); break; + default: + assert(0); break; } + + expr_t::ptr_op_t mask; + mask = new expr_t::op_t(expr_t::op_t::VALUE); + mask->set_value(mask_t(*tok.value)); + + node->set_left(ident); + node->set_right(mask); } + break; - if (parse_argument) { - bool in_prefix = true; - bool found_specifier = false; - bool no_final_slash = false; - - only_parenthesis = true; - - std::ostringstream buf; - string parens; - - for (const char * c = arg.c_str(); *c != '\0'; c++) { - bool consumed = false; - - if (*c != '(' && *c != ')') - only_parenthesis = false; - - if (in_prefix) { - switch (*c) { - case ')': - if (only_parenthesis) - only_closed_parenthesis = true; - // fall through... - case '(': - parens += c; - consumed = true; - break; - case '@': - buf << "(payee =~ /"; - found_specifier = true; - consumed = true; - break; - case '#': - buf << "(code =~ /"; - found_specifier = true; - consumed = true; - break; - case '=': - buf << "("; - found_specifier = true; - no_final_slash = true; - consumed = true; - break; - case '&': - buf << "(note =~ /"; - found_specifier = true; - consumed = true; - break; - case '%': { - bool found_metadata = false; - for (const char *q = c; *q != '\0'; q++) - if (*q == '=') { - buf << "has_tag(/" - << string(c + 1, q - c - 1) << "/, /"; - found_metadata = true; - c = q; - break; - } - if (! found_metadata) { - buf << "has_tag(/"; - } - found_specifier = true; - consumed = true; - break; - } - default: - if (! found_specifier) { - buf << parens << "(account =~ /"; - parens.clear(); - found_specifier = true; - } - in_prefix = false; - break; - } - } - - if (! consumed) - buf << *c; - } + case query_lexer_t::token_t::LPAREN: + node = parse_query_expr(tok_context); + tok = lexer.next_token(); + if (tok.kind != query_lexer_t::token_t::RPAREN) + tok.expected(')'); + break; - if (! prefix.empty() && - ! (only_parenthesis && only_closed_parenthesis)) - expr << prefix; + default: + lexer.push_token(tok); + break; + } + + return node; +} - expr << parens << buf.str(); +expr_t::ptr_op_t +query_parser_t::parse_unary_expr(query_lexer_t::token_t::kind_t tok_context) +{ + expr_t::ptr_op_t node; - if (found_specifier) { - if (! no_final_slash) - expr << "/"; - expr << ")"; + query_lexer_t::token_t tok = lexer.next_token(); + switch (tok.kind) { + case query_lexer_t::token_t::TOK_NOT: { + expr_t::ptr_op_t term(parse_query_term(tok_context)); + if (! term) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol()); + + node = new expr_t::op_t(expr_t::op_t::O_NOT); + node->set_left(term); + break; + } + + default: + lexer.push_token(tok); + node = parse_query_term(tok_context); + break; + } + + return node; +} + +expr_t::ptr_op_t +query_parser_t::parse_and_expr(query_lexer_t::token_t::kind_t tok_context) +{ + if (expr_t::ptr_op_t node = parse_unary_expr(tok_context)) { + while (true) { + query_lexer_t::token_t tok = lexer.next_token(); + if (tok.kind == query_lexer_t::token_t::TOK_AND) { + expr_t::ptr_op_t prev(node); + node = new expr_t::op_t(expr_t::op_t::O_AND); + node->set_left(prev); + node->set_right(parse_unary_expr(tok_context)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol()); + } else { + lexer.push_token(tok); + break; + } + } + return node; + } + return expr_t::ptr_op_t(); +} + +expr_t::ptr_op_t +query_parser_t::parse_or_expr(query_lexer_t::token_t::kind_t tok_context) +{ + if (expr_t::ptr_op_t node = parse_and_expr(tok_context)) { + while (true) { + query_lexer_t::token_t tok = lexer.next_token(); + if (tok.kind == query_lexer_t::token_t::TOK_OR) { + expr_t::ptr_op_t prev(node); + node = new expr_t::op_t(expr_t::op_t::O_OR); + node->set_left(prev); + node->set_right(parse_and_expr(tok_context)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol()); + } else { + lexer.push_token(tok); + break; } - } else { - expr << prefix; } + return node; + } + return expr_t::ptr_op_t(); +} - begin++; +expr_t::ptr_op_t +query_parser_t::parse_query_expr(query_lexer_t::token_t::kind_t tok_context) +{ + if (expr_t::ptr_op_t node = parse_or_expr(tok_context)) { + if (expr_t::ptr_op_t next = parse_query_expr(tok_context)) { + expr_t::ptr_op_t prev(node); + node = new expr_t::op_t(expr_t::op_t::O_OR); + node->set_left(prev); + node->set_right(next); + } + return node; } + return expr_t::ptr_op_t(); +} - return std::string("(") + expr.str() + ")"; +expr_t::ptr_op_t query_parser_t::parse() +{ + return parse_query_expr(query_lexer_t::token_t::TOK_ACCOUNT); +} + +expr_t args_to_predicate(value_t::sequence_t::const_iterator& begin, + value_t::sequence_t::const_iterator end) +{ + query_parser_t parser(begin, end); + return expr_t(parser.parse()); } } // namespace ledger -- cgit v1.2.3