summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2009-10-28 18:40:31 -0400
committerJohn Wiegley <johnw@newartisans.com>2009-10-28 18:40:31 -0400
commit47df7dd60e9209db3be91a7b29a91911ee4a846b (patch)
treef67431dbeb4f8a9f2fa575a554e1fd570615f735 /src
parent9408f3cbae5027734fe9b22ba3855e209d192eb1 (diff)
downloadfork-ledger-47df7dd60e9209db3be91a7b29a91911ee4a846b.tar.gz
fork-ledger-47df7dd60e9209db3be91a7b29a91911ee4a846b.tar.bz2
fork-ledger-47df7dd60e9209db3be91a7b29a91911ee4a846b.zip
Rewrote the report query parser
It is now a full parser that parses report queries directly into value expression trees. These then get rendered into text so that other options may extend the expression.
Diffstat (limited to 'src')
-rw-r--r--src/precmd.cc4
-rw-r--r--src/predicate.cc449
-rw-r--r--src/predicate.h156
-rw-r--r--src/report.cc4
4 files changed, 460 insertions, 153 deletions
diff --git a/src/precmd.cc b/src/precmd.cc
index 1160cc64..999261fa 100644
--- a/src/precmd.cc
+++ b/src/precmd.cc
@@ -226,7 +226,7 @@ value_t args_command(call_scope_t& args)
args.value().dump(out);
out << std::endl << std::endl;
- string predicate = args_to_predicate_expr(begin, end);
+ string predicate = args_to_predicate(begin, end).text();
call_scope_t sub_args(static_cast<scope_t&>(args));
sub_args.push_back(string_value(predicate));
@@ -237,7 +237,7 @@ value_t args_command(call_scope_t& args)
out << std::endl << _("====== Display predicate ======")
<< std::endl << std::endl;
- predicate = args_to_predicate_expr(begin, end);
+ predicate = args_to_predicate(begin, end).text();
call_scope_t disp_sub_args(static_cast<scope_t&>(args));
disp_sub_args.push_back(string_value(predicate));
diff --git a/src/predicate.cc b/src/predicate.cc
index ce71a180..4f712904 100644
--- a/src/predicate.cc
+++ b/src/predicate.cc
@@ -32,175 +32,330 @@
#include <system.hh>
#include "predicate.h"
+#include "op.h"
namespace ledger {
-string args_to_predicate_expr(value_t::sequence_t::const_iterator& begin,
- value_t::sequence_t::const_iterator end)
+query_lexer_t::token_t query_lexer_t::next_token()
{
- std::ostringstream expr;
-
- bool append_or = false;
- bool only_parenthesis = false;
-
- while (begin != end) {
- string arg = (*begin).as_string();
- string prefix;
+ if (token_cache.kind != token_t::UNKNOWN) {
+ token_t tok = token_cache;
+ token_cache = token_t();
+ return tok;
+ }
- if (arg == "show") {
- ++begin;
- break;
+ if (arg_i == arg_end) {
+ if (begin == end || ++begin == end) {
+ return token_t(token_t::END_REACHED);
+ } else {
+ arg_i = (*begin).as_string().begin();
+ arg_end = (*begin).as_string().end();
}
+ }
- bool parse_argument = true;
- bool only_closed_parenthesis = false;;
+ resume:
+ bool consume_next = false;
+ switch (*arg_i) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ if (++arg_i == arg_end)
+ return next_token();
+ goto resume;
- if (arg == "not" || arg == "NOT") {
- if (append_or)
- prefix = " | ! ";
- else
- prefix = " ! ";
- parse_argument = false;
- append_or = false;
- }
- else if (arg == "and" || arg == "AND") {
- prefix = " & ";
- parse_argument = false;
- append_or = false;
- }
- else if (arg == "or" || arg == "OR") {
- prefix = " | ";
- parse_argument = false;
- append_or = false;
- }
- else if (append_or) {
- if (! only_parenthesis)
- prefix = " | ";
+ case '(': ++arg_i; return token_t(token_t::LPAREN);
+ case ')': ++arg_i; return token_t(token_t::RPAREN);
+ case '&': ++arg_i; return token_t(token_t::TOK_AND);
+ case '|': ++arg_i; return token_t(token_t::TOK_OR);
+ case '!': ++arg_i; return token_t(token_t::TOK_NOT);
+ case '@': ++arg_i; return token_t(token_t::TOK_PAYEE);
+ case '#': ++arg_i; return token_t(token_t::TOK_CODE);
+ case '%': ++arg_i; return token_t(token_t::TOK_META);
+ case '=':
+ // The '=' keyword at the beginning of a string causes the entire string
+ // to be taken as an expression.
+ if (arg_i == (*begin).as_string().begin())
+ consume_whitespace = true;
+ ++arg_i;
+ return token_t(token_t::TOK_EQ);
+
+ case '\\':
+ consume_next = true;
+ ++arg_i;
+ // fall through...
+ default: {
+ string ident;
+ string::const_iterator beg = arg_i;
+ for (; arg_i != arg_end; ++arg_i) {
+ switch (*arg_i) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ if (! consume_whitespace)
+ goto test_ident;
+ else
+ ident.push_back(*arg_i);
+ break;
+ case '(':
+ case ')':
+ case '&':
+ case '|':
+ case '!':
+ case '@':
+ case '#':
+ case '%':
+ case '=':
+ if (! consume_next)
+ goto test_ident;
+ // fall through...
+ default:
+ ident.push_back(*arg_i);
+ break;
+ }
}
- else {
- append_or = true;
+ consume_whitespace = false;
+
+ test_ident:
+ if (ident == "and")
+ return token_t(token_t::TOK_AND);
+ else if (ident == "or")
+ return token_t(token_t::TOK_OR);
+ else if (ident == "not")
+ return token_t(token_t::TOK_NOT);
+ else if (ident == "account")
+ return token_t(token_t::TOK_ACCOUNT);
+ else if (ident == "desc")
+ return token_t(token_t::TOK_PAYEE);
+ else if (ident == "payee")
+ return token_t(token_t::TOK_PAYEE);
+ else if (ident == "code")
+ return token_t(token_t::TOK_CODE);
+ else if (ident == "note")
+ return token_t(token_t::TOK_NOT);
+ else if (ident == "tag")
+ return token_t(token_t::TOK_META);
+ else if (ident == "meta")
+ return token_t(token_t::TOK_META);
+ else if (ident == "data")
+ return token_t(token_t::TOK_META);
+ else if (ident == "expr") {
+ // The expr keyword takes the whole of the next string as its
+ // argument.
+ consume_whitespace = true;
+ return token_t(token_t::TOK_EXPR);
}
+ else
+ return token_t(token_t::TERM, ident);
+ break;
+ }
+ }
- value_t::sequence_t::const_iterator next = begin;
- if (++next != end) {
- if (arg == "desc" || arg == "DESC" ||
- arg == "payee" || arg == "PAYEE") {
- arg = string("@") + (*++begin).as_string();
- }
- else if (arg == "code" || arg == "CODE") {
- arg = string("#") + (*++begin).as_string();
- }
- else if (arg == "note" || arg == "NOTE") {
- arg = string("&") + (*++begin).as_string();
- }
- else if (arg == "tag" || arg == "TAG" ||
- arg == "meta" || arg == "META" ||
- arg == "data" || arg == "DATA") {
- arg = string("%") + (*++begin).as_string();
- }
- else if (arg == "expr" || arg == "EXPR") {
- arg = string("=") + (*++begin).as_string();
+ return token_t(token_t::UNKNOWN);
+}
+
+void query_lexer_t::token_t::unexpected()
+{
+ kind_t prev_kind = kind;
+
+ kind = UNKNOWN;
+
+ switch (prev_kind) {
+ case END_REACHED:
+ throw_(parse_error, _("Unexpected end of expression"));
+ case TERM:
+ throw_(parse_error, _("Unexpected string '%1'") << *value);
+ default:
+ throw_(parse_error, _("Unexpected token '%1'") << symbol());
+ }
+}
+
+void query_lexer_t::token_t::expected(char wanted, char c)
+{
+ kind = UNKNOWN;
+
+ if (c == '\0' || c == -1) {
+ if (wanted == '\0' || wanted == -1)
+ throw_(parse_error, _("Unexpected end"));
+ else
+ throw_(parse_error, _("Missing '%1'") << wanted);
+ } else {
+ if (wanted == '\0' || wanted == -1)
+ throw_(parse_error, _("Invalid char '%1'") << c);
+ else
+ throw_(parse_error, _("Invalid char '%1' (wanted '%2')") << c << wanted);
+ }
+}
+
+expr_t::ptr_op_t
+query_parser_t::parse_query_term(query_lexer_t::token_t::kind_t tok_context)
+{
+ expr_t::ptr_op_t node;
+
+ query_lexer_t::token_t tok = lexer.next_token();
+ switch (tok.kind) {
+ case query_lexer_t::token_t::END_REACHED:
+ break;
+
+ case query_lexer_t::token_t::TOK_ACCOUNT:
+ case query_lexer_t::token_t::TOK_PAYEE:
+ case query_lexer_t::token_t::TOK_CODE:
+ case query_lexer_t::token_t::TOK_NOTE:
+ case query_lexer_t::token_t::TOK_META:
+ case query_lexer_t::token_t::TOK_EXPR:
+ node = parse_query_term(tok.kind);
+ if (! node)
+ throw_(parse_error,
+ _("%1 operator not followed by argument") << tok.symbol());
+ break;
+
+ case query_lexer_t::token_t::TERM:
+ assert(tok.value);
+ if (tok_context == query_lexer_t::token_t::TOK_META) {
+ assert(0);
+ } else {
+ node = new expr_t::op_t(expr_t::op_t::O_MATCH);
+
+ expr_t::ptr_op_t ident;
+ ident = new expr_t::op_t(expr_t::op_t::IDENT);
+ switch (tok_context) {
+ case query_lexer_t::token_t::TOK_ACCOUNT:
+ ident->set_ident("account"); break;
+ case query_lexer_t::token_t::TOK_PAYEE:
+ ident->set_ident("payee"); break;
+ case query_lexer_t::token_t::TOK_CODE:
+ ident->set_ident("code"); break;
+ case query_lexer_t::token_t::TOK_NOTE:
+ ident->set_ident("note"); break;
+ default:
+ assert(0); break;
}
+
+ expr_t::ptr_op_t mask;
+ mask = new expr_t::op_t(expr_t::op_t::VALUE);
+ mask->set_value(mask_t(*tok.value));
+
+ node->set_left(ident);
+ node->set_right(mask);
}
+ break;
- if (parse_argument) {
- bool in_prefix = true;
- bool found_specifier = false;
- bool no_final_slash = false;
-
- only_parenthesis = true;
-
- std::ostringstream buf;
- string parens;
-
- for (const char * c = arg.c_str(); *c != '\0'; c++) {
- bool consumed = false;
-
- if (*c != '(' && *c != ')')
- only_parenthesis = false;
-
- if (in_prefix) {
- switch (*c) {
- case ')':
- if (only_parenthesis)
- only_closed_parenthesis = true;
- // fall through...
- case '(':
- parens += c;
- consumed = true;
- break;
- case '@':
- buf << "(payee =~ /";
- found_specifier = true;
- consumed = true;
- break;
- case '#':
- buf << "(code =~ /";
- found_specifier = true;
- consumed = true;
- break;
- case '=':
- buf << "(";
- found_specifier = true;
- no_final_slash = true;
- consumed = true;
- break;
- case '&':
- buf << "(note =~ /";
- found_specifier = true;
- consumed = true;
- break;
- case '%': {
- bool found_metadata = false;
- for (const char *q = c; *q != '\0'; q++)
- if (*q == '=') {
- buf << "has_tag(/"
- << string(c + 1, q - c - 1) << "/, /";
- found_metadata = true;
- c = q;
- break;
- }
- if (! found_metadata) {
- buf << "has_tag(/";
- }
- found_specifier = true;
- consumed = true;
- break;
- }
- default:
- if (! found_specifier) {
- buf << parens << "(account =~ /";
- parens.clear();
- found_specifier = true;
- }
- in_prefix = false;
- break;
- }
- }
-
- if (! consumed)
- buf << *c;
- }
+ case query_lexer_t::token_t::LPAREN:
+ node = parse_query_expr(tok_context);
+ tok = lexer.next_token();
+ if (tok.kind != query_lexer_t::token_t::RPAREN)
+ tok.expected(')');
+ break;
- if (! prefix.empty() &&
- ! (only_parenthesis && only_closed_parenthesis))
- expr << prefix;
+ default:
+ lexer.push_token(tok);
+ break;
+ }
+
+ return node;
+}
- expr << parens << buf.str();
+expr_t::ptr_op_t
+query_parser_t::parse_unary_expr(query_lexer_t::token_t::kind_t tok_context)
+{
+ expr_t::ptr_op_t node;
- if (found_specifier) {
- if (! no_final_slash)
- expr << "/";
- expr << ")";
+ query_lexer_t::token_t tok = lexer.next_token();
+ switch (tok.kind) {
+ case query_lexer_t::token_t::TOK_NOT: {
+ expr_t::ptr_op_t term(parse_query_term(tok_context));
+ if (! term)
+ throw_(parse_error,
+ _("%1 operator not followed by argument") << tok.symbol());
+
+ node = new expr_t::op_t(expr_t::op_t::O_NOT);
+ node->set_left(term);
+ break;
+ }
+
+ default:
+ lexer.push_token(tok);
+ node = parse_query_term(tok_context);
+ break;
+ }
+
+ return node;
+}
+
+expr_t::ptr_op_t
+query_parser_t::parse_and_expr(query_lexer_t::token_t::kind_t tok_context)
+{
+ if (expr_t::ptr_op_t node = parse_unary_expr(tok_context)) {
+ while (true) {
+ query_lexer_t::token_t tok = lexer.next_token();
+ if (tok.kind == query_lexer_t::token_t::TOK_AND) {
+ expr_t::ptr_op_t prev(node);
+ node = new expr_t::op_t(expr_t::op_t::O_AND);
+ node->set_left(prev);
+ node->set_right(parse_unary_expr(tok_context));
+ if (! node->right())
+ throw_(parse_error,
+ _("%1 operator not followed by argument") << tok.symbol());
+ } else {
+ lexer.push_token(tok);
+ break;
+ }
+ }
+ return node;
+ }
+ return expr_t::ptr_op_t();
+}
+
+expr_t::ptr_op_t
+query_parser_t::parse_or_expr(query_lexer_t::token_t::kind_t tok_context)
+{
+ if (expr_t::ptr_op_t node = parse_and_expr(tok_context)) {
+ while (true) {
+ query_lexer_t::token_t tok = lexer.next_token();
+ if (tok.kind == query_lexer_t::token_t::TOK_OR) {
+ expr_t::ptr_op_t prev(node);
+ node = new expr_t::op_t(expr_t::op_t::O_OR);
+ node->set_left(prev);
+ node->set_right(parse_and_expr(tok_context));
+ if (! node->right())
+ throw_(parse_error,
+ _("%1 operator not followed by argument") << tok.symbol());
+ } else {
+ lexer.push_token(tok);
+ break;
}
- } else {
- expr << prefix;
}
+ return node;
+ }
+ return expr_t::ptr_op_t();
+}
- begin++;
+expr_t::ptr_op_t
+query_parser_t::parse_query_expr(query_lexer_t::token_t::kind_t tok_context)
+{
+ if (expr_t::ptr_op_t node = parse_or_expr(tok_context)) {
+ if (expr_t::ptr_op_t next = parse_query_expr(tok_context)) {
+ expr_t::ptr_op_t prev(node);
+ node = new expr_t::op_t(expr_t::op_t::O_OR);
+ node->set_left(prev);
+ node->set_right(next);
+ }
+ return node;
}
+ return expr_t::ptr_op_t();
+}
- return std::string("(") + expr.str() + ")";
+expr_t::ptr_op_t query_parser_t::parse()
+{
+ return parse_query_expr(query_lexer_t::token_t::TOK_ACCOUNT);
+}
+
+expr_t args_to_predicate(value_t::sequence_t::const_iterator& begin,
+ value_t::sequence_t::const_iterator end)
+{
+ query_parser_t parser(begin, end);
+ return expr_t(parser.parse());
}
} // namespace ledger
diff --git a/src/predicate.h b/src/predicate.h
index 3e9fc6b1..e1048f83 100644
--- a/src/predicate.h
+++ b/src/predicate.h
@@ -96,8 +96,160 @@ public:
}
};
-string args_to_predicate_expr(value_t::sequence_t::const_iterator& begin,
- value_t::sequence_t::const_iterator end);
+class query_lexer_t
+{
+ value_t::sequence_t::const_iterator begin;
+ value_t::sequence_t::const_iterator end;
+
+ string::const_iterator arg_i;
+ string::const_iterator arg_end;
+
+ bool consume_whitespace;
+
+public:
+ struct token_t
+ {
+ enum kind_t {
+ UNKNOWN,
+
+ LPAREN,
+ RPAREN,
+
+ TOK_NOT,
+ TOK_AND,
+ TOK_OR,
+ TOK_EQ,
+
+ TOK_ACCOUNT,
+ TOK_PAYEE,
+ TOK_CODE,
+ TOK_NOTE,
+ TOK_META,
+ TOK_EXPR,
+
+ TERM,
+
+ END_REACHED
+
+ } kind;
+
+ optional<string> value;
+
+ explicit token_t(kind_t _kind = UNKNOWN,
+ const optional<string>& _value = none)
+ : kind(_kind), value(_value) {
+ TRACE_CTOR(query_lexer_t::token_t, "");
+ }
+ token_t(const token_t& tok)
+ : kind(tok.kind), value(tok.value) {
+ TRACE_CTOR(query_lexer_t::token_t, "copy");
+ }
+ ~token_t() throw() {
+ TRACE_DTOR(query_lexer_t::token_t);
+ }
+
+ token_t& operator=(const token_t& tok) {
+ if (this != &tok) {
+ kind = tok.kind;
+ value = tok.value;
+ }
+ return *this;
+ }
+
+ operator bool() const {
+ return kind != END_REACHED;
+ }
+
+ string to_string() const {
+ switch (kind) {
+ case UNKNOWN: return "UNKNOWN";
+ case LPAREN: return "LPAREN";
+ case RPAREN: return "RPAREN";
+ case TOK_NOT: return "TOK_NOT";
+ case TOK_AND: return "TOK_AND";
+ case TOK_OR: return "TOK_OR";
+ case TOK_EQ: return "TOK_EQ";
+ case TOK_ACCOUNT: return "TOK_ACCOUNT";
+ case TOK_PAYEE: return "TOK_PAYEE";
+ case TOK_CODE: return "TOK_CODE";
+ case TOK_NOTE: return "TOK_NOTE";
+ case TOK_META: return "TOK_META";
+ case TOK_EXPR: return "TOK_EXPR";
+ case TERM: return string("TERM(") + *value + ")";
+ case END_REACHED: return "END_REACHED";
+ }
+ }
+
+ string symbol() const {
+ switch (kind) {
+ case LPAREN: return "(";
+ case RPAREN: return ")";
+ case TOK_NOT: return "not";
+ case TOK_AND: return "and";
+ case TOK_OR: return "or";
+ case TOK_EQ: return "=";
+ case TOK_ACCOUNT: return "account";
+ case TOK_PAYEE: return "payee";
+ case TOK_CODE: return "code";
+ case TOK_NOTE: return "note";
+ case TOK_META: return "meta";
+ case TOK_EXPR: return "expr";
+
+ case END_REACHED: return "<EOF>";
+
+ case TERM:
+ assert(0);
+ return "<TERM>";
+
+ case UNKNOWN:
+ default:
+ assert(0);
+ return "<UNKNOWN>";
+ }
+ }
+
+ void unexpected();
+ void expected(char wanted, char c = '\0');
+ };
+
+ token_t token_cache;
+
+ query_lexer_t(value_t::sequence_t::const_iterator _begin,
+ value_t::sequence_t::const_iterator _end)
+ : begin(_begin), end(_end), consume_whitespace(false)
+ {
+ assert(begin != end);
+ arg_i = (*begin).as_string().begin();
+ arg_end = (*begin).as_string().end();
+ }
+
+ token_t next_token();
+ void push_token(token_t tok) {
+ assert(token_cache.kind == token_t::UNKNOWN);
+ token_cache = tok;
+ }
+};
+
+class query_parser_t
+{
+ query_lexer_t lexer;
+
+ expr_t::ptr_op_t parse_query_term(query_lexer_t::token_t::kind_t tok_context);
+ expr_t::ptr_op_t parse_unary_expr(query_lexer_t::token_t::kind_t tok_context);
+ expr_t::ptr_op_t parse_and_expr(query_lexer_t::token_t::kind_t tok_context);
+ expr_t::ptr_op_t parse_or_expr(query_lexer_t::token_t::kind_t tok_context);
+ expr_t::ptr_op_t parse_query_expr(query_lexer_t::token_t::kind_t tok_context);
+
+public:
+ query_parser_t(value_t::sequence_t::const_iterator begin,
+ value_t::sequence_t::const_iterator end)
+ : lexer(begin, end) {}
+
+ expr_t::ptr_op_t parse();
+};
+
+expr_t args_to_predicate(value_t::sequence_t::const_iterator& begin,
+ value_t::sequence_t::const_iterator end);
} // namespace ledger
diff --git a/src/report.cc b/src/report.cc
index 62b54ad1..febe43e3 100644
--- a/src/report.cc
+++ b/src/report.cc
@@ -380,7 +380,7 @@ namespace {
value_t::sequence_t::const_iterator end =
args.value().as_sequence().end();
- string limit = args_to_predicate_expr(begin, end);
+ string limit = args_to_predicate(begin, end).text();
if (! limit.empty())
report.HANDLER(limit_).on(whence, limit);
@@ -390,7 +390,7 @@ namespace {
string display;
if (begin != end)
- display = args_to_predicate_expr(begin, end);
+ display = args_to_predicate(begin, end).text();
if (! display.empty())
report.HANDLER(display_).on(whence, display);