/* * Copyright (c) 2003-2023, John Wiegley. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of New Artisans LLC nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "query.h" #include "op.h" namespace ledger { // TODO: Extend this to handle all characters which define enclosures bool query_t::lexer_t::unbalanced_braces(string str) { int balance = 0; for (char& c : str) { if (c == '(') ++balance; else if (c == ')') if (--balance < 0) return true; } return balance != 0; } query_t::lexer_t::token_t query_t::lexer_t::next_token(query_t::lexer_t::token_t::kind_t tok_context) { if (token_cache.kind != token_t::UNKNOWN) { token_t tok = token_cache; token_cache = token_t(); return tok; } if (arg_i == arg_end) { if (begin == end || ++begin == end) { return token_t(token_t::END_REACHED); } else { arg_i = (*begin).as_string().begin(); arg_end = (*begin).as_string().end(); } } resume: switch (*arg_i) { case '\0': assert(false); break; case '\'': case '"': case '/': { string pat; char closing = *arg_i; bool found_closing = false; for (++arg_i; arg_i != arg_end; ++arg_i) { if (*arg_i == '\\') { if (++arg_i == arg_end) throw_(parse_error, _("Unexpected '\\' at end of pattern")); } else if (*arg_i == closing) { ++arg_i; found_closing = true; break; } pat.push_back(*arg_i); } if (! found_closing) throw_(parse_error, _f("Expected '%1%' at end of pattern") % closing); if (pat.empty()) throw_(parse_error, _("Match pattern is empty")); return token_t(token_t::TERM, pat); } } if (multiple_args && consume_next_arg) { consume_next_arg = false; token_t tok(token_t::TERM, string(arg_i, arg_end)); prev_arg_i = arg_i; arg_i = arg_end; return tok; } bool consume_next = false; switch (*arg_i) { case '\0': assert(false); break; case ' ': case '\t': case '\r': case '\n': if (++arg_i == arg_end) return next_token(tok_context); goto resume; case '(': ++arg_i; if (tok_context == token_t::TOK_EXPR) consume_whitespace = true; return token_t(token_t::LPAREN); case ')': ++arg_i; if (tok_context == token_t::TOK_EXPR) consume_whitespace = false; return token_t(token_t::RPAREN); case '&': ++arg_i; return token_t(token_t::TOK_AND); case '|': ++arg_i; return token_t(token_t::TOK_OR); case '!': ++arg_i; return token_t(token_t::TOK_NOT); case '@': ++arg_i; return token_t(token_t::TOK_PAYEE); case '#': ++arg_i; return token_t(token_t::TOK_CODE); case '%': ++arg_i; return token_t(token_t::TOK_META); case '=': if (arg_i == (*begin).as_string().begin()) { ++arg_i; return token_t(token_t::TOK_NOTE); } ++arg_i; consume_next = true; return token_t(token_t::TOK_EQ); case '\\': consume_next = true; ++arg_i; // fall through... default: { string ident; for (; arg_i != arg_end; ++arg_i) { switch (*arg_i) { case '\0': assert(false); break; case ' ': case '\t': case '\n': case '\r': if (! multiple_args && ! consume_whitespace && ! consume_next_arg) goto test_ident; else ident.push_back(*arg_i); break; case ')': if (unbalanced_braces(ident)) consume_next = true; if (! consume_next && tok_context == token_t::TOK_EXPR) goto test_ident; // fall through... case '(': case '&': case '|': case '!': case '@': case '#': case '%': case '=': if (! consume_next && tok_context != token_t::TOK_EXPR) goto test_ident; // fall through... default: ident.push_back(*arg_i); break; } } consume_whitespace = false; test_ident: if (ident == "and") return token_t(token_t::TOK_AND); else if (ident == "or") return token_t(token_t::TOK_OR); else if (ident == "not") return token_t(token_t::TOK_NOT); else if (ident == "code") return token_t(token_t::TOK_CODE); else if (ident == "desc") return token_t(token_t::TOK_PAYEE); else if (ident == "payee") return token_t(token_t::TOK_PAYEE); else if (ident == "note") return token_t(token_t::TOK_NOTE); else if (ident == "tag") return token_t(token_t::TOK_META); else if (ident == "meta") return token_t(token_t::TOK_META); else if (ident == "data") return token_t(token_t::TOK_META); else if (ident == "show") return token_t(token_t::TOK_SHOW); else if (ident == "only") return token_t(token_t::TOK_ONLY); else if (ident == "bold") return token_t(token_t::TOK_BOLD); else if (ident == "for") return token_t(token_t::TOK_FOR); else if (ident == "since") return token_t(token_t::TOK_SINCE); else if (ident == "until") return token_t(token_t::TOK_UNTIL); else if (ident == "expr") { // The expr keyword takes the whole of the next string as its argument. consume_next_arg = true; return token_t(token_t::TOK_EXPR); } else return token_t(token_t::TERM, ident); } } return token_t(token_t::UNKNOWN); } void query_t::lexer_t::token_t::expected(char wanted) { throw_(parse_error, _f("Missing '%1%'") % wanted); } expr_t::ptr_op_t query_t::parser_t::parse_query_term(query_t::lexer_t::token_t::kind_t tok_context) { expr_t::ptr_op_t node; lexer_t::token_t tok = lexer.next_token(tok_context); switch (tok.kind) { case lexer_t::token_t::TOK_SHOW: case lexer_t::token_t::TOK_ONLY: case lexer_t::token_t::TOK_BOLD: case lexer_t::token_t::TOK_FOR: case lexer_t::token_t::TOK_SINCE: case lexer_t::token_t::TOK_UNTIL: case lexer_t::token_t::END_REACHED: lexer.push_token(tok); break; case lexer_t::token_t::TOK_CODE: case lexer_t::token_t::TOK_PAYEE: case lexer_t::token_t::TOK_NOTE: case lexer_t::token_t::TOK_ACCOUNT: case lexer_t::token_t::TOK_META: case lexer_t::token_t::TOK_EXPR: node = parse_query_term(tok.kind); if (! node) throw_(parse_error, _f("%1% operator not followed by argument") % tok.symbol()); break; case lexer_t::token_t::TERM: assert(tok.value); switch (tok_context) { case lexer_t::token_t::TOK_EXPR: node = expr_t(*tok.value).get_op(); break; case lexer_t::token_t::TOK_META: { node = new expr_t::op_t(expr_t::op_t::O_CALL); expr_t::ptr_op_t ident = new expr_t::op_t(expr_t::op_t::IDENT); ident->set_ident("has_tag"); node->set_left(ident); expr_t::ptr_op_t arg1 = new expr_t::op_t(expr_t::op_t::VALUE); arg1->set_value(mask_t(*tok.value)); tok = lexer.peek_token(tok_context); if (tok.kind == lexer_t::token_t::TOK_EQ) { tok = lexer.next_token(tok_context); tok = lexer.next_token(tok_context); if (tok.kind != lexer_t::token_t::TERM) throw_(parse_error, _("Metadata equality operator not followed by term")); expr_t::ptr_op_t arg2 = new expr_t::op_t(expr_t::op_t::VALUE); assert(tok.value); arg2->set_value(mask_t(*tok.value)); node->set_right(expr_t::op_t::new_node (expr_t::op_t::O_SEQ, expr_t::op_t::new_node (expr_t::op_t::O_CONS, arg1, arg2))); } else { node->set_right(arg1); } break; } default: { node = new expr_t::op_t(expr_t::op_t::O_MATCH); expr_t::ptr_op_t ident = new expr_t::op_t(expr_t::op_t::IDENT); switch (tok_context) { case lexer_t::token_t::TOK_ACCOUNT: ident->set_ident("account"); break; case lexer_t::token_t::TOK_PAYEE: ident->set_ident("payee"); break; case lexer_t::token_t::TOK_CODE: ident->set_ident("code"); break; case lexer_t::token_t::TOK_NOTE: ident->set_ident("note"); break; default: assert(false); break; } expr_t::ptr_op_t mask = new expr_t::op_t(expr_t::op_t::VALUE); DEBUG("query.mask", "Mask from string: " << *tok.value); mask->set_value(mask_t(*tok.value)); DEBUG("query.mask", "Mask is: " << mask->as_value().as_mask().str()); node->set_left(ident); node->set_right(mask); } } break; case lexer_t::token_t::LPAREN: node = parse_query_expr(tok_context, true); tok = lexer.next_token(tok_context); if (tok.kind != lexer_t::token_t::RPAREN) tok.expected(')'); break; default: lexer.push_token(tok); break; } return node; } expr_t::ptr_op_t query_t::parser_t::parse_unary_expr(lexer_t::token_t::kind_t tok_context) { expr_t::ptr_op_t node; lexer_t::token_t tok = lexer.next_token(tok_context); switch (tok.kind) { case lexer_t::token_t::TOK_NOT: { expr_t::ptr_op_t term(parse_query_term(tok_context)); if (! term) throw_(parse_error, _f("%1% operator not followed by argument") % tok.symbol()); node = new expr_t::op_t(expr_t::op_t::O_NOT); node->set_left(term); break; } default: lexer.push_token(tok); node = parse_query_term(tok_context); break; } return node; } expr_t::ptr_op_t query_t::parser_t::parse_and_expr(lexer_t::token_t::kind_t tok_context) { if (expr_t::ptr_op_t node = parse_unary_expr(tok_context)) { while (true) { lexer_t::token_t tok = lexer.next_token(tok_context); if (tok.kind == lexer_t::token_t::TOK_AND) { expr_t::ptr_op_t prev(node); node = new expr_t::op_t(expr_t::op_t::O_AND); node->set_left(prev); node->set_right(parse_unary_expr(tok_context)); if (! node->right()) throw_(parse_error, _f("%1% operator not followed by argument") % tok.symbol()); } else { lexer.push_token(tok); break; } } return node; } return expr_t::ptr_op_t(); } expr_t::ptr_op_t query_t::parser_t::parse_or_expr(lexer_t::token_t::kind_t tok_context) { if (expr_t::ptr_op_t node = parse_and_expr(tok_context)) { while (true) { lexer_t::token_t tok = lexer.next_token(tok_context); if (tok.kind == lexer_t::token_t::TOK_OR) { expr_t::ptr_op_t prev(node); node = new expr_t::op_t(expr_t::op_t::O_OR); node->set_left(prev); node->set_right(parse_and_expr(tok_context)); if (! node->right()) throw_(parse_error, _f("%1% operator not followed by argument") % tok.symbol()); } else { lexer.push_token(tok); break; } } return node; } return expr_t::ptr_op_t(); } expr_t::ptr_op_t query_t::parser_t::parse_query_expr(lexer_t::token_t::kind_t tok_context, bool subexpression) { expr_t::ptr_op_t limiter; while (expr_t::ptr_op_t next = parse_or_expr(tok_context)) { if (! limiter) { limiter = next; } else { expr_t::ptr_op_t prev(limiter); limiter = new expr_t::op_t(expr_t::op_t::O_OR); limiter->set_left(prev); limiter->set_right(next); } } if (! subexpression) { if (limiter) query_map.insert (query_map_t::value_type (QUERY_LIMIT, predicate_t(limiter, what_to_keep).print_to_str())); lexer_t::token_t tok = lexer.peek_token(tok_context); while (tok.kind != lexer_t::token_t::END_REACHED) { switch (tok.kind) { case lexer_t::token_t::TOK_SHOW: case lexer_t::token_t::TOK_ONLY: case lexer_t::token_t::TOK_BOLD: { lexer.next_token(tok_context); kind_t kind; switch (tok.kind) { case lexer_t::token_t::TOK_SHOW: kind = QUERY_SHOW; break; case lexer_t::token_t::TOK_ONLY: kind = QUERY_ONLY; break; case lexer_t::token_t::TOK_BOLD: kind = QUERY_BOLD; break; default: break; } expr_t::ptr_op_t node; while (expr_t::ptr_op_t next = parse_or_expr(tok_context)) { if (! node) { node = next; } else { expr_t::ptr_op_t prev(node); node = new expr_t::op_t(expr_t::op_t::O_OR); node->set_left(prev); node->set_right(next); } } if (node) query_map.insert (query_map_t::value_type (kind, predicate_t(node, what_to_keep).print_to_str())); break; } case lexer_t::token_t::TOK_FOR: case lexer_t::token_t::TOK_SINCE: case lexer_t::token_t::TOK_UNTIL: { tok = lexer.next_token(tok_context); string for_string; if (tok.kind == lexer_t::token_t::TOK_SINCE) for_string = "since"; else if (tok.kind == lexer_t::token_t::TOK_UNTIL) for_string = "until"; lexer.consume_next_arg = true; tok = lexer.peek_token(tok_context); while (tok.kind != lexer_t::token_t::END_REACHED) { tok = lexer.next_token(tok_context); assert(tok.kind == lexer_t::token_t::TERM); if (*tok.value == "show" || *tok.value == "bold" || *tok.value == "for" || *tok.value == "since" || *tok.value == "until") { lexer.token_cache = lexer_t::token_t(); lexer.arg_i = lexer.prev_arg_i; lexer.consume_next_arg = false; break; } if (! for_string.empty()) for_string += " "; for_string += *tok.value; lexer.consume_next_arg = true; tok = lexer.peek_token(tok_context); } if (! for_string.empty()) query_map.insert(query_map_t::value_type(QUERY_FOR, for_string)); break; } default: goto done; } tok = lexer.peek_token(tok_context); } done: ; } return limiter; } } // namespace ledger