From d19745afded63276449bb56b7d24c38c7e32d0a7 Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Tue, 1 Jun 2010 17:32:41 -0400 Subject: Improve parsing of 'expr' query terms Fixes #157 / 9DF85DF2-4BF5-4931-A30C-2592A10BB5C0 --- src/query.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'src/query.cc') diff --git a/src/query.cc b/src/query.cc index 1f086df8..c79fe1c2 100644 --- a/src/query.cc +++ b/src/query.cc @@ -55,8 +55,9 @@ query_t::lexer_t::token_t query_t::lexer_t::next_token() if (consume_next_arg) { consume_next_arg = false; + token_t tok(token_t::TERM, string(arg_i, arg_end)); arg_i = arg_end; - return token_t(token_t::TERM, (*begin).as_string()); + return tok; } resume: @@ -70,23 +71,25 @@ query_t::lexer_t::token_t query_t::lexer_t::next_token() return next_token(); goto resume; + case '\'': case '/': { string pat; - bool found_end_slash = false; + char closing = *arg_i; + bool found_closing = false; for (++arg_i; arg_i != arg_end; ++arg_i) { if (*arg_i == '\\') { if (++arg_i == arg_end) throw_(parse_error, _("Unexpected '\\' at end of pattern")); } - else if (*arg_i == '/') { + else if (*arg_i == closing) { ++arg_i; - found_end_slash = true; + found_closing = true; break; } pat.push_back(*arg_i); } - if (! found_end_slash) - throw_(parse_error, _("Expected '/' at end of pattern")); + if (! found_closing) + throw_(parse_error, _("Expected '%1' at end of pattern") << closing); if (pat.empty()) throw_(parse_error, _("Match pattern is empty")); -- cgit v1.2.3 From 5cdd36f358dcafddd29e7a6c5e0d8210b65bfd79 Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Tue, 1 Jun 2010 18:32:27 -0400 Subject: Further improved parsing of query expressions Fixes #210 / D4C2DD6F-8967-4FFC-BBBC-A941F9C53475 --- src/query.cc | 42 ++++++++++++++++++++++++------------------ src/query.h | 27 ++++++++++++++++----------- src/textual.cc | 2 +- test/unit/t_expr.cc | 6 ++---- 4 files changed, 43 insertions(+), 34 deletions(-) (limited to 'src/query.cc') diff --git a/src/query.cc b/src/query.cc index c79fe1c2..363c6f73 100644 --- a/src/query.cc +++ b/src/query.cc @@ -53,25 +53,9 @@ query_t::lexer_t::token_t query_t::lexer_t::next_token() } } - if (consume_next_arg) { - consume_next_arg = false; - token_t tok(token_t::TERM, string(arg_i, arg_end)); - arg_i = arg_end; - return tok; - } - - resume: - bool consume_next = false; switch (*arg_i) { - case ' ': - case '\t': - case '\r': - case '\n': - if (++arg_i == arg_end) - return next_token(); - goto resume; - case '\'': + case '"': case '/': { string pat; char closing = *arg_i; @@ -95,6 +79,25 @@ query_t::lexer_t::token_t query_t::lexer_t::next_token() return token_t(token_t::TERM, pat); } + } + + if (multiple_args && consume_next_arg) { + consume_next_arg = false; + token_t tok(token_t::TERM, string(arg_i, arg_end)); + arg_i = arg_end; + return tok; + } + + resume: + bool consume_next = false; + switch (*arg_i) { + case ' ': + case '\t': + case '\r': + case '\n': + if (++arg_i == arg_end) + return next_token(); + goto resume; case '(': ++arg_i; return token_t(token_t::LPAREN); case ')': ++arg_i; return token_t(token_t::RPAREN); @@ -104,7 +107,10 @@ query_t::lexer_t::token_t query_t::lexer_t::next_token() case '@': ++arg_i; return token_t(token_t::TOK_PAYEE); case '#': ++arg_i; return token_t(token_t::TOK_CODE); case '%': ++arg_i; return token_t(token_t::TOK_META); - case '=': ++arg_i; return token_t(token_t::TOK_EQ); + case '=': + ++arg_i; + consume_next_arg = true; + return token_t(token_t::TOK_EQ); case '\\': consume_next = true; diff --git a/src/query.h b/src/query.h index 2b0bc75d..59adfd72 100644 --- a/src/query.h +++ b/src/query.h @@ -62,6 +62,7 @@ public: bool consume_whitespace; bool consume_next_arg; + bool multiple_args; public: struct token_t @@ -177,10 +178,11 @@ public: token_t token_cache; lexer_t(value_t::sequence_t::const_iterator _begin, - value_t::sequence_t::const_iterator _end) + value_t::sequence_t::const_iterator _end, + bool _multiple_args = true) : begin(_begin), end(_end), - consume_whitespace(false), - consume_next_arg(false) + consume_whitespace(false), consume_next_arg(false), + multiple_args(_multiple_args) { TRACE_CTOR(query_t::lexer_t, ""); assert(begin != end); @@ -192,6 +194,7 @@ public: arg_i(lexer.arg_i), arg_end(lexer.arg_end), consume_whitespace(lexer.consume_whitespace), consume_next_arg(lexer.consume_next_arg), + multiple_args(lexer.multiple_args), token_cache(lexer.token_cache) { TRACE_CTOR(query_t::lexer_t, "copy"); @@ -227,8 +230,8 @@ protected: expr_t::ptr_op_t parse_query_expr(lexer_t::token_t::kind_t tok_context); public: - parser_t(const value_t& _args) - : args(_args), lexer(args.begin(), args.end()) { + parser_t(const value_t& _args, bool multiple_args = true) + : args(_args), lexer(args.begin(), args.end(), multiple_args) { TRACE_CTOR(query_t::parser_t, ""); } parser_t(const parser_t& parser) @@ -261,28 +264,30 @@ public: TRACE_CTOR(query_t, "copy"); } query_t(const string& arg, - const keep_details_t& _what_to_keep = keep_details_t()) + const keep_details_t& _what_to_keep = keep_details_t(), + bool multiple_args = true) : predicate_t(_what_to_keep) { TRACE_CTOR(query_t, "string, keep_details_t"); if (! arg.empty()) { value_t temp(string_value(arg)); - parse_args(temp.to_sequence()); + parse_args(temp.to_sequence(), multiple_args); } } query_t(const value_t& args, - const keep_details_t& _what_to_keep = keep_details_t()) + const keep_details_t& _what_to_keep = keep_details_t(), + bool multiple_args = true) : predicate_t(_what_to_keep) { TRACE_CTOR(query_t, "value_t, keep_details_t"); if (! args.empty()) - parse_args(args); + parse_args(args, multiple_args); } virtual ~query_t() { TRACE_DTOR(query_t); } - void parse_args(const value_t& args) { + void parse_args(const value_t& args, bool multiple_args = true) { if (! parser) - parser = parser_t(args); + parser = parser_t(args, multiple_args); ptr = parser->parse(); // expr_t::ptr } diff --git a/src/textual.cc b/src/textual.cc index 2b204df0..9a49edd4 100644 --- a/src/textual.cc +++ b/src/textual.cc @@ -526,7 +526,7 @@ void instance_t::automated_xact_directive(char * line) std::auto_ptr ae (new auto_xact_t(query_t(string(skip_ws(line + 1)), - keep_details_t(true, true, true)))); + keep_details_t(true, true, true), false))); ae->pos = position_t(); ae->pos->pathname = pathname; ae->pos->beg_pos = line_beg_pos; diff --git a/test/unit/t_expr.cc b/test/unit/t_expr.cc index b5865948..0d88be9e 100644 --- a/test/unit/t_expr.cc +++ b/test/unit/t_expr.cc @@ -158,8 +158,6 @@ void ValueExprTestCase::testPredicateTokenizer7() assertEqual(query_t::lexer_t::token_t::TOK_EQ, tokens.next_token().kind); assertEqual(query_t::lexer_t::token_t::TERM, tokens.next_token().kind); - assertEqual(query_t::lexer_t::token_t::TOK_AND, tokens.next_token().kind); - assertEqual(query_t::lexer_t::token_t::TERM, tokens.next_token().kind); assertEqual(query_t::lexer_t::token_t::END_REACHED, tokens.next_token().kind); #endif } @@ -167,7 +165,7 @@ void ValueExprTestCase::testPredicateTokenizer7() void ValueExprTestCase::testPredicateTokenizer8() { value_t args; - args.push_back(string_value("expr foo and bar")); + args.push_back(string_value("expr 'foo and bar'")); #ifndef NOT_FOR_PYTHON query_t::lexer_t tokens(args.begin(), args.end()); @@ -182,7 +180,7 @@ void ValueExprTestCase::testPredicateTokenizer9() { value_t args; args.push_back(string_value("expr")); - args.push_back(string_value("foo and bar")); + args.push_back(string_value("'foo and bar'")); #ifndef NOT_FOR_PYTHON query_t::lexer_t tokens(args.begin(), args.end()); -- cgit v1.2.3