summaryrefslogtreecommitdiff
path: root/src/predicate.cc
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-10-28 18:40:31 -0400
committerJohn Wiegley <johnw@newartisans.com>2009-10-28 18:40:31 -0400
commit47df7dd60e9209db3be91a7b29a91911ee4a846b (patch)
treef67431dbeb4f8a9f2fa575a554e1fd570615f735 /src/predicate.cc
parent9408f3cbae5027734fe9b22ba3855e209d192eb1 (diff)
downloadfork-ledger-47df7dd60e9209db3be91a7b29a91911ee4a846b.tar.gz
fork-ledger-47df7dd60e9209db3be91a7b29a91911ee4a846b.tar.bz2
fork-ledger-47df7dd60e9209db3be91a7b29a91911ee4a846b.zip
Rewrote the report query parser
It is now a full parser that parses report queries directly into value expression trees. These then get rendered into text so that other options may extend the expression.
Diffstat (limited to 'src/predicate.cc')
-rw-r--r--src/predicate.cc449
1 files changed, 302 insertions, 147 deletions
diff --git a/src/predicate.cc b/src/predicate.cc
index ce71a180..4f712904 100644
--- a/src/predicate.cc
+++ b/src/predicate.cc
@@ -32,175 +32,330 @@
#include <system.hh>
#include "predicate.h"
+#include "op.h"
namespace ledger {
-string args_to_predicate_expr(value_t::sequence_t::const_iterator& begin,
- value_t::sequence_t::const_iterator end)
+query_lexer_t::token_t query_lexer_t::next_token()
{
- std::ostringstream expr;
-
- bool append_or = false;
- bool only_parenthesis = false;
-
- while (begin != end) {
- string arg = (*begin).as_string();
- string prefix;
+ if (token_cache.kind != token_t::UNKNOWN) {
+ token_t tok = token_cache;
+ token_cache = token_t();
+ return tok;
+ }
- if (arg == "show") {
- ++begin;
- break;
+ if (arg_i == arg_end) {
+ if (begin == end || ++begin == end) {
+ return token_t(token_t::END_REACHED);
+ } else {
+ arg_i = (*begin).as_string().begin();
+ arg_end = (*begin).as_string().end();
}
+ }
- bool parse_argument = true;
- bool only_closed_parenthesis = false;;
+ resume:
+ bool consume_next = false;
+ switch (*arg_i) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ if (++arg_i == arg_end)
+ return next_token();
+ goto resume;
- if (arg == "not" || arg == "NOT") {
- if (append_or)
- prefix = " | ! ";
- else
- prefix = " ! ";
- parse_argument = false;
- append_or = false;
- }
- else if (arg == "and" || arg == "AND") {
- prefix = " & ";
- parse_argument = false;
- append_or = false;
- }
- else if (arg == "or" || arg == "OR") {
- prefix = " | ";
- parse_argument = false;
- append_or = false;
- }
- else if (append_or) {
- if (! only_parenthesis)
- prefix = " | ";
+ case '(': ++arg_i; return token_t(token_t::LPAREN);
+ case ')': ++arg_i; return token_t(token_t::RPAREN);
+ case '&': ++arg_i; return token_t(token_t::TOK_AND);
+ case '|': ++arg_i; return token_t(token_t::TOK_OR);
+ case '!': ++arg_i; return token_t(token_t::TOK_NOT);
+ case '@': ++arg_i; return token_t(token_t::TOK_PAYEE);
+ case '#': ++arg_i; return token_t(token_t::TOK_CODE);
+ case '%': ++arg_i; return token_t(token_t::TOK_META);
+ case '=':
+ // The '=' keyword at the beginning of a string causes the entire string
+ // to be taken as an expression.
+ if (arg_i == (*begin).as_string().begin())
+ consume_whitespace = true;
+ ++arg_i;
+ return token_t(token_t::TOK_EQ);
+
+ case '\\':
+ consume_next = true;
+ ++arg_i;
+ // fall through...
+ default: {
+ string ident;
+ string::const_iterator beg = arg_i;
+ for (; arg_i != arg_end; ++arg_i) {
+ switch (*arg_i) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ if (! consume_whitespace)
+ goto test_ident;
+ else
+ ident.push_back(*arg_i);
+ break;
+ case '(':
+ case ')':
+ case '&':
+ case '|':
+ case '!':
+ case '@':
+ case '#':
+ case '%':
+ case '=':
+ if (! consume_next)
+ goto test_ident;
+ // fall through...
+ default:
+ ident.push_back(*arg_i);
+ break;
+ }
}
- else {
- append_or = true;
+ consume_whitespace = false;
+
+ test_ident:
+ if (ident == "and")
+ return token_t(token_t::TOK_AND);
+ else if (ident == "or")
+ return token_t(token_t::TOK_OR);
+ else if (ident == "not")
+ return token_t(token_t::TOK_NOT);
+ else if (ident == "account")
+ return token_t(token_t::TOK_ACCOUNT);
+ else if (ident == "desc")
+ return token_t(token_t::TOK_PAYEE);
+ else if (ident == "payee")
+ return token_t(token_t::TOK_PAYEE);
+ else if (ident == "code")
+ return token_t(token_t::TOK_CODE);
+ else if (ident == "note")
+ return token_t(token_t::TOK_NOT);
+ else if (ident == "tag")
+ return token_t(token_t::TOK_META);
+ else if (ident == "meta")
+ return token_t(token_t::TOK_META);
+ else if (ident == "data")
+ return token_t(token_t::TOK_META);
+ else if (ident == "expr") {
+ // The expr keyword takes the whole of the next string as its
+ // argument.
+ consume_whitespace = true;
+ return token_t(token_t::TOK_EXPR);
}
+ else
+ return token_t(token_t::TERM, ident);
+ break;
+ }
+ }
- value_t::sequence_t::const_iterator next = begin;
- if (++next != end) {
- if (arg == "desc" || arg == "DESC" ||
- arg == "payee" || arg == "PAYEE") {
- arg = string("@") + (*++begin).as_string();
- }
- else if (arg == "code" || arg == "CODE") {
- arg = string("#") + (*++begin).as_string();
- }
- else if (arg == "note" || arg == "NOTE") {
- arg = string("&") + (*++begin).as_string();
- }
- else if (arg == "tag" || arg == "TAG" ||
- arg == "meta" || arg == "META" ||
- arg == "data" || arg == "DATA") {
- arg = string("%") + (*++begin).as_string();
- }
- else if (arg == "expr" || arg == "EXPR") {
- arg = string("=") + (*++begin).as_string();
+ return token_t(token_t::UNKNOWN);
+}
+
+void query_lexer_t::token_t::unexpected()
+{
+ kind_t prev_kind = kind;
+
+ kind = UNKNOWN;
+
+ switch (prev_kind) {
+ case END_REACHED:
+ throw_(parse_error, _("Unexpected end of expression"));
+ case TERM:
+ throw_(parse_error, _("Unexpected string '%1'") << *value);
+ default:
+ throw_(parse_error, _("Unexpected token '%1'") << symbol());
+ }
+}
+
+void query_lexer_t::token_t::expected(char wanted, char c)
+{
+ kind = UNKNOWN;
+
+ if (c == '\0' || c == -1) {
+ if (wanted == '\0' || wanted == -1)
+ throw_(parse_error, _("Unexpected end"));
+ else
+ throw_(parse_error, _("Missing '%1'") << wanted);
+ } else {
+ if (wanted == '\0' || wanted == -1)
+ throw_(parse_error, _("Invalid char '%1'") << c);
+ else
+ throw_(parse_error, _("Invalid char '%1' (wanted '%2')") << c << wanted);
+ }
+}
+
+expr_t::ptr_op_t
+query_parser_t::parse_query_term(query_lexer_t::token_t::kind_t tok_context)
+{
+ expr_t::ptr_op_t node;
+
+ query_lexer_t::token_t tok = lexer.next_token();
+ switch (tok.kind) {
+ case query_lexer_t::token_t::END_REACHED:
+ break;
+
+ case query_lexer_t::token_t::TOK_ACCOUNT:
+ case query_lexer_t::token_t::TOK_PAYEE:
+ case query_lexer_t::token_t::TOK_CODE:
+ case query_lexer_t::token_t::TOK_NOTE:
+ case query_lexer_t::token_t::TOK_META:
+ case query_lexer_t::token_t::TOK_EXPR:
+ node = parse_query_term(tok.kind);
+ if (! node)
+ throw_(parse_error,
+ _("%1 operator not followed by argument") << tok.symbol());
+ break;
+
+ case query_lexer_t::token_t::TERM:
+ assert(tok.value);
+ if (tok_context == query_lexer_t::token_t::TOK_META) {
+ assert(0);
+ } else {
+ node = new expr_t::op_t(expr_t::op_t::O_MATCH);
+
+ expr_t::ptr_op_t ident;
+ ident = new expr_t::op_t(expr_t::op_t::IDENT);
+ switch (tok_context) {
+ case query_lexer_t::token_t::TOK_ACCOUNT:
+ ident->set_ident("account"); break;
+ case query_lexer_t::token_t::TOK_PAYEE:
+ ident->set_ident("payee"); break;
+ case query_lexer_t::token_t::TOK_CODE:
+ ident->set_ident("code"); break;
+ case query_lexer_t::token_t::TOK_NOTE:
+ ident->set_ident("note"); break;
+ default:
+ assert(0); break;
}
+
+ expr_t::ptr_op_t mask;
+ mask = new expr_t::op_t(expr_t::op_t::VALUE);
+ mask->set_value(mask_t(*tok.value));
+
+ node->set_left(ident);
+ node->set_right(mask);
}
+ break;
- if (parse_argument) {
- bool in_prefix = true;
- bool found_specifier = false;
- bool no_final_slash = false;
-
- only_parenthesis = true;
-
- std::ostringstream buf;
- string parens;
-
- for (const char * c = arg.c_str(); *c != '\0'; c++) {
- bool consumed = false;
-
- if (*c != '(' && *c != ')')
- only_parenthesis = false;
-
- if (in_prefix) {
- switch (*c) {
- case ')':
- if (only_parenthesis)
- only_closed_parenthesis = true;
- // fall through...
- case '(':
- parens += c;
- consumed = true;
- break;
- case '@':
- buf << "(payee =~ /";
- found_specifier = true;
- consumed = true;
- break;
- case '#':
- buf << "(code =~ /";
- found_specifier = true;
- consumed = true;
- break;
- case '=':
- buf << "(";
- found_specifier = true;
- no_final_slash = true;
- consumed = true;
- break;
- case '&':
- buf << "(note =~ /";
- found_specifier = true;
- consumed = true;
- break;
- case '%': {
- bool found_metadata = false;
- for (const char *q = c; *q != '\0'; q++)
- if (*q == '=') {
- buf << "has_tag(/"
- << string(c + 1, q - c - 1) << "/, /";
- found_metadata = true;
- c = q;
- break;
- }
- if (! found_metadata) {
- buf << "has_tag(/";
- }
- found_specifier = true;
- consumed = true;
- break;
- }
- default:
- if (! found_specifier) {
- buf << parens << "(account =~ /";
- parens.clear();
- found_specifier = true;
- }
- in_prefix = false;
- break;
- }
- }
-
- if (! consumed)
- buf << *c;
- }
+ case query_lexer_t::token_t::LPAREN:
+ node = parse_query_expr(tok_context);
+ tok = lexer.next_token();
+ if (tok.kind != query_lexer_t::token_t::RPAREN)
+ tok.expected(')');
+ break;
- if (! prefix.empty() &&
- ! (only_parenthesis && only_closed_parenthesis))
- expr << prefix;
+ default:
+ lexer.push_token(tok);
+ break;
+ }
+
+ return node;
+}
- expr << parens << buf.str();
+expr_t::ptr_op_t
+query_parser_t::parse_unary_expr(query_lexer_t::token_t::kind_t tok_context)
+{
+ expr_t::ptr_op_t node;
- if (found_specifier) {
- if (! no_final_slash)
- expr << "/";
- expr << ")";
+ query_lexer_t::token_t tok = lexer.next_token();
+ switch (tok.kind) {
+ case query_lexer_t::token_t::TOK_NOT: {
+ expr_t::ptr_op_t term(parse_query_term(tok_context));
+ if (! term)
+ throw_(parse_error,
+ _("%1 operator not followed by argument") << tok.symbol());
+
+ node = new expr_t::op_t(expr_t::op_t::O_NOT);
+ node->set_left(term);
+ break;
+ }
+
+ default:
+ lexer.push_token(tok);
+ node = parse_query_term(tok_context);
+ break;
+ }
+
+ return node;
+}
+
+expr_t::ptr_op_t
+query_parser_t::parse_and_expr(query_lexer_t::token_t::kind_t tok_context)
+{
+ if (expr_t::ptr_op_t node = parse_unary_expr(tok_context)) {
+ while (true) {
+ query_lexer_t::token_t tok = lexer.next_token();
+ if (tok.kind == query_lexer_t::token_t::TOK_AND) {
+ expr_t::ptr_op_t prev(node);
+ node = new expr_t::op_t(expr_t::op_t::O_AND);
+ node->set_left(prev);
+ node->set_right(parse_unary_expr(tok_context));
+ if (! node->right())
+ throw_(parse_error,
+ _("%1 operator not followed by argument") << tok.symbol());
+ } else {
+ lexer.push_token(tok);
+ break;
+ }
+ }
+ return node;
+ }
+ return expr_t::ptr_op_t();
+}
+
+expr_t::ptr_op_t
+query_parser_t::parse_or_expr(query_lexer_t::token_t::kind_t tok_context)
+{
+ if (expr_t::ptr_op_t node = parse_and_expr(tok_context)) {
+ while (true) {
+ query_lexer_t::token_t tok = lexer.next_token();
+ if (tok.kind == query_lexer_t::token_t::TOK_OR) {
+ expr_t::ptr_op_t prev(node);
+ node = new expr_t::op_t(expr_t::op_t::O_OR);
+ node->set_left(prev);
+ node->set_right(parse_and_expr(tok_context));
+ if (! node->right())
+ throw_(parse_error,
+ _("%1 operator not followed by argument") << tok.symbol());
+ } else {
+ lexer.push_token(tok);
+ break;
}
- } else {
- expr << prefix;
}
+ return node;
+ }
+ return expr_t::ptr_op_t();
+}
- begin++;
+expr_t::ptr_op_t
+query_parser_t::parse_query_expr(query_lexer_t::token_t::kind_t tok_context)
+{
+ if (expr_t::ptr_op_t node = parse_or_expr(tok_context)) {
+ if (expr_t::ptr_op_t next = parse_query_expr(tok_context)) {
+ expr_t::ptr_op_t prev(node);
+ node = new expr_t::op_t(expr_t::op_t::O_OR);
+ node->set_left(prev);
+ node->set_right(next);
+ }
+ return node;
}
+ return expr_t::ptr_op_t();
+}
- return std::string("(") + expr.str() + ")";
+expr_t::ptr_op_t query_parser_t::parse()
+{
+ return parse_query_expr(query_lexer_t::token_t::TOK_ACCOUNT);
+}
+
+expr_t args_to_predicate(value_t::sequence_t::const_iterator& begin,
+ value_t::sequence_t::const_iterator end)
+{
+ query_parser_t parser(begin, end);
+ return expr_t(parser.parse());
}
} // namespace ledger