diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/precmd.cc | 4 | ||||
-rw-r--r-- | src/predicate.cc | 449 | ||||
-rw-r--r-- | src/predicate.h | 156 | ||||
-rw-r--r-- | src/report.cc | 4 |
4 files changed, 460 insertions, 153 deletions
diff --git a/src/precmd.cc b/src/precmd.cc index 1160cc64..999261fa 100644 --- a/src/precmd.cc +++ b/src/precmd.cc @@ -226,7 +226,7 @@ value_t args_command(call_scope_t& args) args.value().dump(out); out << std::endl << std::endl; - string predicate = args_to_predicate_expr(begin, end); + string predicate = args_to_predicate(begin, end).text(); call_scope_t sub_args(static_cast<scope_t&>(args)); sub_args.push_back(string_value(predicate)); @@ -237,7 +237,7 @@ value_t args_command(call_scope_t& args) out << std::endl << _("====== Display predicate ======") << std::endl << std::endl; - predicate = args_to_predicate_expr(begin, end); + predicate = args_to_predicate(begin, end).text(); call_scope_t disp_sub_args(static_cast<scope_t&>(args)); disp_sub_args.push_back(string_value(predicate)); diff --git a/src/predicate.cc b/src/predicate.cc index ce71a180..4f712904 100644 --- a/src/predicate.cc +++ b/src/predicate.cc @@ -32,175 +32,330 @@ #include <system.hh> #include "predicate.h" +#include "op.h" namespace ledger { -string args_to_predicate_expr(value_t::sequence_t::const_iterator& begin, - value_t::sequence_t::const_iterator end) +query_lexer_t::token_t query_lexer_t::next_token() { - std::ostringstream expr; - - bool append_or = false; - bool only_parenthesis = false; - - while (begin != end) { - string arg = (*begin).as_string(); - string prefix; + if (token_cache.kind != token_t::UNKNOWN) { + token_t tok = token_cache; + token_cache = token_t(); + return tok; + } - if (arg == "show") { - ++begin; - break; + if (arg_i == arg_end) { + if (begin == end || ++begin == end) { + return token_t(token_t::END_REACHED); + } else { + arg_i = (*begin).as_string().begin(); + arg_end = (*begin).as_string().end(); } + } - bool parse_argument = true; - bool only_closed_parenthesis = false;; + resume: + bool consume_next = false; + switch (*arg_i) { + case ' ': + case '\t': + case '\r': + case '\n': + if (++arg_i == arg_end) + return next_token(); + goto resume; - if (arg == "not" || arg == "NOT") { - if (append_or) - prefix = " | ! "; - else - prefix = " ! "; - parse_argument = false; - append_or = false; - } - else if (arg == "and" || arg == "AND") { - prefix = " & "; - parse_argument = false; - append_or = false; - } - else if (arg == "or" || arg == "OR") { - prefix = " | "; - parse_argument = false; - append_or = false; - } - else if (append_or) { - if (! only_parenthesis) - prefix = " | "; + case '(': ++arg_i; return token_t(token_t::LPAREN); + case ')': ++arg_i; return token_t(token_t::RPAREN); + case '&': ++arg_i; return token_t(token_t::TOK_AND); + case '|': ++arg_i; return token_t(token_t::TOK_OR); + case '!': ++arg_i; return token_t(token_t::TOK_NOT); + case '@': ++arg_i; return token_t(token_t::TOK_PAYEE); + case '#': ++arg_i; return token_t(token_t::TOK_CODE); + case '%': ++arg_i; return token_t(token_t::TOK_META); + case '=': + // The '=' keyword at the beginning of a string causes the entire string + // to be taken as an expression. + if (arg_i == (*begin).as_string().begin()) + consume_whitespace = true; + ++arg_i; + return token_t(token_t::TOK_EQ); + + case '\\': + consume_next = true; + ++arg_i; + // fall through... + default: { + string ident; + string::const_iterator beg = arg_i; + for (; arg_i != arg_end; ++arg_i) { + switch (*arg_i) { + case ' ': + case '\t': + case '\n': + case '\r': + if (! consume_whitespace) + goto test_ident; + else + ident.push_back(*arg_i); + break; + case '(': + case ')': + case '&': + case '|': + case '!': + case '@': + case '#': + case '%': + case '=': + if (! consume_next) + goto test_ident; + // fall through... + default: + ident.push_back(*arg_i); + break; + } } - else { - append_or = true; + consume_whitespace = false; + + test_ident: + if (ident == "and") + return token_t(token_t::TOK_AND); + else if (ident == "or") + return token_t(token_t::TOK_OR); + else if (ident == "not") + return token_t(token_t::TOK_NOT); + else if (ident == "account") + return token_t(token_t::TOK_ACCOUNT); + else if (ident == "desc") + return token_t(token_t::TOK_PAYEE); + else if (ident == "payee") + return token_t(token_t::TOK_PAYEE); + else if (ident == "code") + return token_t(token_t::TOK_CODE); + else if (ident == "note") + return token_t(token_t::TOK_NOT); + else if (ident == "tag") + return token_t(token_t::TOK_META); + else if (ident == "meta") + return token_t(token_t::TOK_META); + else if (ident == "data") + return token_t(token_t::TOK_META); + else if (ident == "expr") { + // The expr keyword takes the whole of the next string as its + // argument. + consume_whitespace = true; + return token_t(token_t::TOK_EXPR); } + else + return token_t(token_t::TERM, ident); + break; + } + } - value_t::sequence_t::const_iterator next = begin; - if (++next != end) { - if (arg == "desc" || arg == "DESC" || - arg == "payee" || arg == "PAYEE") { - arg = string("@") + (*++begin).as_string(); - } - else if (arg == "code" || arg == "CODE") { - arg = string("#") + (*++begin).as_string(); - } - else if (arg == "note" || arg == "NOTE") { - arg = string("&") + (*++begin).as_string(); - } - else if (arg == "tag" || arg == "TAG" || - arg == "meta" || arg == "META" || - arg == "data" || arg == "DATA") { - arg = string("%") + (*++begin).as_string(); - } - else if (arg == "expr" || arg == "EXPR") { - arg = string("=") + (*++begin).as_string(); + return token_t(token_t::UNKNOWN); +} + +void query_lexer_t::token_t::unexpected() +{ + kind_t prev_kind = kind; + + kind = UNKNOWN; + + switch (prev_kind) { + case END_REACHED: + throw_(parse_error, _("Unexpected end of expression")); + case TERM: + throw_(parse_error, _("Unexpected string '%1'") << *value); + default: + throw_(parse_error, _("Unexpected token '%1'") << symbol()); + } +} + +void query_lexer_t::token_t::expected(char wanted, char c) +{ + kind = UNKNOWN; + + if (c == '\0' || c == -1) { + if (wanted == '\0' || wanted == -1) + throw_(parse_error, _("Unexpected end")); + else + throw_(parse_error, _("Missing '%1'") << wanted); + } else { + if (wanted == '\0' || wanted == -1) + throw_(parse_error, _("Invalid char '%1'") << c); + else + throw_(parse_error, _("Invalid char '%1' (wanted '%2')") << c << wanted); + } +} + +expr_t::ptr_op_t +query_parser_t::parse_query_term(query_lexer_t::token_t::kind_t tok_context) +{ + expr_t::ptr_op_t node; + + query_lexer_t::token_t tok = lexer.next_token(); + switch (tok.kind) { + case query_lexer_t::token_t::END_REACHED: + break; + + case query_lexer_t::token_t::TOK_ACCOUNT: + case query_lexer_t::token_t::TOK_PAYEE: + case query_lexer_t::token_t::TOK_CODE: + case query_lexer_t::token_t::TOK_NOTE: + case query_lexer_t::token_t::TOK_META: + case query_lexer_t::token_t::TOK_EXPR: + node = parse_query_term(tok.kind); + if (! node) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol()); + break; + + case query_lexer_t::token_t::TERM: + assert(tok.value); + if (tok_context == query_lexer_t::token_t::TOK_META) { + assert(0); + } else { + node = new expr_t::op_t(expr_t::op_t::O_MATCH); + + expr_t::ptr_op_t ident; + ident = new expr_t::op_t(expr_t::op_t::IDENT); + switch (tok_context) { + case query_lexer_t::token_t::TOK_ACCOUNT: + ident->set_ident("account"); break; + case query_lexer_t::token_t::TOK_PAYEE: + ident->set_ident("payee"); break; + case query_lexer_t::token_t::TOK_CODE: + ident->set_ident("code"); break; + case query_lexer_t::token_t::TOK_NOTE: + ident->set_ident("note"); break; + default: + assert(0); break; } + + expr_t::ptr_op_t mask; + mask = new expr_t::op_t(expr_t::op_t::VALUE); + mask->set_value(mask_t(*tok.value)); + + node->set_left(ident); + node->set_right(mask); } + break; - if (parse_argument) { - bool in_prefix = true; - bool found_specifier = false; - bool no_final_slash = false; - - only_parenthesis = true; - - std::ostringstream buf; - string parens; - - for (const char * c = arg.c_str(); *c != '\0'; c++) { - bool consumed = false; - - if (*c != '(' && *c != ')') - only_parenthesis = false; - - if (in_prefix) { - switch (*c) { - case ')': - if (only_parenthesis) - only_closed_parenthesis = true; - // fall through... - case '(': - parens += c; - consumed = true; - break; - case '@': - buf << "(payee =~ /"; - found_specifier = true; - consumed = true; - break; - case '#': - buf << "(code =~ /"; - found_specifier = true; - consumed = true; - break; - case '=': - buf << "("; - found_specifier = true; - no_final_slash = true; - consumed = true; - break; - case '&': - buf << "(note =~ /"; - found_specifier = true; - consumed = true; - break; - case '%': { - bool found_metadata = false; - for (const char *q = c; *q != '\0'; q++) - if (*q == '=') { - buf << "has_tag(/" - << string(c + 1, q - c - 1) << "/, /"; - found_metadata = true; - c = q; - break; - } - if (! found_metadata) { - buf << "has_tag(/"; - } - found_specifier = true; - consumed = true; - break; - } - default: - if (! found_specifier) { - buf << parens << "(account =~ /"; - parens.clear(); - found_specifier = true; - } - in_prefix = false; - break; - } - } - - if (! consumed) - buf << *c; - } + case query_lexer_t::token_t::LPAREN: + node = parse_query_expr(tok_context); + tok = lexer.next_token(); + if (tok.kind != query_lexer_t::token_t::RPAREN) + tok.expected(')'); + break; - if (! prefix.empty() && - ! (only_parenthesis && only_closed_parenthesis)) - expr << prefix; + default: + lexer.push_token(tok); + break; + } + + return node; +} - expr << parens << buf.str(); +expr_t::ptr_op_t +query_parser_t::parse_unary_expr(query_lexer_t::token_t::kind_t tok_context) +{ + expr_t::ptr_op_t node; - if (found_specifier) { - if (! no_final_slash) - expr << "/"; - expr << ")"; + query_lexer_t::token_t tok = lexer.next_token(); + switch (tok.kind) { + case query_lexer_t::token_t::TOK_NOT: { + expr_t::ptr_op_t term(parse_query_term(tok_context)); + if (! term) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol()); + + node = new expr_t::op_t(expr_t::op_t::O_NOT); + node->set_left(term); + break; + } + + default: + lexer.push_token(tok); + node = parse_query_term(tok_context); + break; + } + + return node; +} + +expr_t::ptr_op_t +query_parser_t::parse_and_expr(query_lexer_t::token_t::kind_t tok_context) +{ + if (expr_t::ptr_op_t node = parse_unary_expr(tok_context)) { + while (true) { + query_lexer_t::token_t tok = lexer.next_token(); + if (tok.kind == query_lexer_t::token_t::TOK_AND) { + expr_t::ptr_op_t prev(node); + node = new expr_t::op_t(expr_t::op_t::O_AND); + node->set_left(prev); + node->set_right(parse_unary_expr(tok_context)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol()); + } else { + lexer.push_token(tok); + break; + } + } + return node; + } + return expr_t::ptr_op_t(); +} + +expr_t::ptr_op_t +query_parser_t::parse_or_expr(query_lexer_t::token_t::kind_t tok_context) +{ + if (expr_t::ptr_op_t node = parse_and_expr(tok_context)) { + while (true) { + query_lexer_t::token_t tok = lexer.next_token(); + if (tok.kind == query_lexer_t::token_t::TOK_OR) { + expr_t::ptr_op_t prev(node); + node = new expr_t::op_t(expr_t::op_t::O_OR); + node->set_left(prev); + node->set_right(parse_and_expr(tok_context)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol()); + } else { + lexer.push_token(tok); + break; } - } else { - expr << prefix; } + return node; + } + return expr_t::ptr_op_t(); +} - begin++; +expr_t::ptr_op_t +query_parser_t::parse_query_expr(query_lexer_t::token_t::kind_t tok_context) +{ + if (expr_t::ptr_op_t node = parse_or_expr(tok_context)) { + if (expr_t::ptr_op_t next = parse_query_expr(tok_context)) { + expr_t::ptr_op_t prev(node); + node = new expr_t::op_t(expr_t::op_t::O_OR); + node->set_left(prev); + node->set_right(next); + } + return node; } + return expr_t::ptr_op_t(); +} - return std::string("(") + expr.str() + ")"; +expr_t::ptr_op_t query_parser_t::parse() +{ + return parse_query_expr(query_lexer_t::token_t::TOK_ACCOUNT); +} + +expr_t args_to_predicate(value_t::sequence_t::const_iterator& begin, + value_t::sequence_t::const_iterator end) +{ + query_parser_t parser(begin, end); + return expr_t(parser.parse()); } } // namespace ledger diff --git a/src/predicate.h b/src/predicate.h index 3e9fc6b1..e1048f83 100644 --- a/src/predicate.h +++ b/src/predicate.h @@ -96,8 +96,160 @@ public: } }; -string args_to_predicate_expr(value_t::sequence_t::const_iterator& begin, - value_t::sequence_t::const_iterator end); +class query_lexer_t +{ + value_t::sequence_t::const_iterator begin; + value_t::sequence_t::const_iterator end; + + string::const_iterator arg_i; + string::const_iterator arg_end; + + bool consume_whitespace; + +public: + struct token_t + { + enum kind_t { + UNKNOWN, + + LPAREN, + RPAREN, + + TOK_NOT, + TOK_AND, + TOK_OR, + TOK_EQ, + + TOK_ACCOUNT, + TOK_PAYEE, + TOK_CODE, + TOK_NOTE, + TOK_META, + TOK_EXPR, + + TERM, + + END_REACHED + + } kind; + + optional<string> value; + + explicit token_t(kind_t _kind = UNKNOWN, + const optional<string>& _value = none) + : kind(_kind), value(_value) { + TRACE_CTOR(query_lexer_t::token_t, ""); + } + token_t(const token_t& tok) + : kind(tok.kind), value(tok.value) { + TRACE_CTOR(query_lexer_t::token_t, "copy"); + } + ~token_t() throw() { + TRACE_DTOR(query_lexer_t::token_t); + } + + token_t& operator=(const token_t& tok) { + if (this != &tok) { + kind = tok.kind; + value = tok.value; + } + return *this; + } + + operator bool() const { + return kind != END_REACHED; + } + + string to_string() const { + switch (kind) { + case UNKNOWN: return "UNKNOWN"; + case LPAREN: return "LPAREN"; + case RPAREN: return "RPAREN"; + case TOK_NOT: return "TOK_NOT"; + case TOK_AND: return "TOK_AND"; + case TOK_OR: return "TOK_OR"; + case TOK_EQ: return "TOK_EQ"; + case TOK_ACCOUNT: return "TOK_ACCOUNT"; + case TOK_PAYEE: return "TOK_PAYEE"; + case TOK_CODE: return "TOK_CODE"; + case TOK_NOTE: return "TOK_NOTE"; + case TOK_META: return "TOK_META"; + case TOK_EXPR: return "TOK_EXPR"; + case TERM: return string("TERM(") + *value + ")"; + case END_REACHED: return "END_REACHED"; + } + } + + string symbol() const { + switch (kind) { + case LPAREN: return "("; + case RPAREN: return ")"; + case TOK_NOT: return "not"; + case TOK_AND: return "and"; + case TOK_OR: return "or"; + case TOK_EQ: return "="; + case TOK_ACCOUNT: return "account"; + case TOK_PAYEE: return "payee"; + case TOK_CODE: return "code"; + case TOK_NOTE: return "note"; + case TOK_META: return "meta"; + case TOK_EXPR: return "expr"; + + case END_REACHED: return "<EOF>"; + + case TERM: + assert(0); + return "<TERM>"; + + case UNKNOWN: + default: + assert(0); + return "<UNKNOWN>"; + } + } + + void unexpected(); + void expected(char wanted, char c = '\0'); + }; + + token_t token_cache; + + query_lexer_t(value_t::sequence_t::const_iterator _begin, + value_t::sequence_t::const_iterator _end) + : begin(_begin), end(_end), consume_whitespace(false) + { + assert(begin != end); + arg_i = (*begin).as_string().begin(); + arg_end = (*begin).as_string().end(); + } + + token_t next_token(); + void push_token(token_t tok) { + assert(token_cache.kind == token_t::UNKNOWN); + token_cache = tok; + } +}; + +class query_parser_t +{ + query_lexer_t lexer; + + expr_t::ptr_op_t parse_query_term(query_lexer_t::token_t::kind_t tok_context); + expr_t::ptr_op_t parse_unary_expr(query_lexer_t::token_t::kind_t tok_context); + expr_t::ptr_op_t parse_and_expr(query_lexer_t::token_t::kind_t tok_context); + expr_t::ptr_op_t parse_or_expr(query_lexer_t::token_t::kind_t tok_context); + expr_t::ptr_op_t parse_query_expr(query_lexer_t::token_t::kind_t tok_context); + +public: + query_parser_t(value_t::sequence_t::const_iterator begin, + value_t::sequence_t::const_iterator end) + : lexer(begin, end) {} + + expr_t::ptr_op_t parse(); +}; + +expr_t args_to_predicate(value_t::sequence_t::const_iterator& begin, + value_t::sequence_t::const_iterator end); } // namespace ledger diff --git a/src/report.cc b/src/report.cc index 62b54ad1..febe43e3 100644 --- a/src/report.cc +++ b/src/report.cc @@ -380,7 +380,7 @@ namespace { value_t::sequence_t::const_iterator end = args.value().as_sequence().end(); - string limit = args_to_predicate_expr(begin, end); + string limit = args_to_predicate(begin, end).text(); if (! limit.empty()) report.HANDLER(limit_).on(whence, limit); @@ -390,7 +390,7 @@ namespace { string display; if (begin != end) - display = args_to_predicate_expr(begin, end); + display = args_to_predicate(begin, end).text(); if (! display.empty()) report.HANDLER(display_).on(whence, display); |