diff options
Diffstat (limited to 'src/parser.cc')
-rw-r--r-- | src/parser.cc | 517 |
1 files changed, 517 insertions, 0 deletions
diff --git a/src/parser.cc b/src/parser.cc new file mode 100644 index 00000000..ef778411 --- /dev/null +++ b/src/parser.cc @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2003-2009, John Wiegley. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of New Artisans LLC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <system.hh> + +#include "parser.h" + +namespace ledger { + +expr_t::ptr_op_t +expr_t::parser_t::parse_value_term(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node; + + token_t& tok = next_token(in, tflags); + + switch (tok.kind) { + case token_t::VALUE: + node = new op_t(op_t::VALUE); + node->set_value(tok.value); + break; + + case token_t::IDENT: { + string ident = tok.value.as_string(); + + node = new op_t(op_t::IDENT); + node->set_ident(ident); + + // An identifier followed by ( represents a function call + tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + if (tok.kind == token_t::LPAREN) { + ptr_op_t call_node(new op_t(op_t::O_CALL)); + call_node->set_left(node); + node = call_node; + + push_token(tok); // let the parser see it again + node->set_right(parse_value_expr(in, tflags.plus_flags(PARSE_SINGLE))); + + if (node->has_right() && node->right()->kind == op_t::O_CONS) + node->set_right(node->right()->left()); + } else { + push_token(tok); + } + break; + } + + case token_t::LPAREN: + node = parse_value_expr(in, tflags.plus_flags(PARSE_PARTIAL) + .minus_flags(PARSE_SINGLE)); + tok = next_token(in, tflags); + if (tok.kind != token_t::RPAREN) + tok.expected(')'); + + if (node->kind == op_t::O_CONS) { + ptr_op_t prev(node); + node = new op_t(op_t::O_SEQ); + node->set_left(prev); + } + break; + + default: + push_token(tok); + break; + } + + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_dot_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node(parse_value_term(in, tflags)); + + if (node && ! tflags.has_flags(PARSE_SINGLE)) { + while (true) { + token_t& tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + if (tok.kind == token_t::DOT) { + ptr_op_t prev(node); + node = new op_t(op_t::O_LOOKUP); + node->set_left(prev); + node->set_right(parse_value_term(in, tflags)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + } else { + push_token(tok); + break; + } + } + } + + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_unary_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node; + + token_t& tok = next_token(in, tflags); + + switch (tok.kind) { + case token_t::EXCLAM: { + ptr_op_t term(parse_dot_expr(in, tflags)); + if (! term) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + + // A very quick optimization + if (term->kind == op_t::VALUE) { + term->as_value_lval().in_place_not(); + node = term; + } else { + node = new op_t(op_t::O_NOT); + node->set_left(term); + } + break; + } + + case token_t::MINUS: { + ptr_op_t term(parse_dot_expr(in, tflags)); + if (! term) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + + // A very quick optimization + if (term->kind == op_t::VALUE) { + term->as_value_lval().in_place_negate(); + node = term; + } else { + node = new op_t(op_t::O_NEG); + node->set_left(term); + } + break; + } + + default: + push_token(tok); + node = parse_dot_expr(in, tflags); + break; + } + + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_mul_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node(parse_unary_expr(in, tflags)); + + if (node && ! tflags.has_flags(PARSE_SINGLE)) { + while (true) { + token_t& tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + + if (tok.kind == token_t::STAR || tok.kind == token_t::SLASH || + tok.kind == token_t::KW_DIV) { + ptr_op_t prev(node); + node = new op_t(tok.kind == token_t::STAR ? + op_t::O_MUL : op_t::O_DIV); + node->set_left(prev); + node->set_right(parse_unary_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + } else { + push_token(tok); + break; + } + } + } + + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_add_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node(parse_mul_expr(in, tflags)); + + if (node && ! tflags.has_flags(PARSE_SINGLE)) { + while (true) { + token_t& tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + + if (tok.kind == token_t::PLUS || + tok.kind == token_t::MINUS) { + ptr_op_t prev(node); + node = new op_t(tok.kind == token_t::PLUS ? + op_t::O_ADD : op_t::O_SUB); + node->set_left(prev); + node->set_right(parse_mul_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + } else { + push_token(tok); + break; + } + } + } + + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_logic_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node(parse_add_expr(in, tflags)); + + if (node && ! tflags.has_flags(PARSE_SINGLE)) { + while (true) { + op_t::kind_t kind = op_t::LAST; + parse_flags_t _flags = tflags; + token_t& tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + bool negate = false; + + switch (tok.kind) { + case token_t::DEFINE: + kind = op_t::O_DEFINE; + break; + case token_t::EQUAL: + if (tflags.has_flags(PARSE_NO_ASSIGN)) + tok.rewind(in); + else + kind = op_t::O_EQ; + break; + case token_t::NEQUAL: + kind = op_t::O_EQ; + negate = true; + break; + case token_t::MATCH: + kind = op_t::O_MATCH; + break; + case token_t::NMATCH: + kind = op_t::O_MATCH; + negate = true; + break; + case token_t::LESS: + kind = op_t::O_LT; + break; + case token_t::LESSEQ: + kind = op_t::O_LTE; + break; + case token_t::GREATER: + kind = op_t::O_GT; + break; + case token_t::GREATEREQ: + kind = op_t::O_GTE; + break; + default: + push_token(tok); + goto exit_loop; + } + + if (kind != op_t::LAST) { + ptr_op_t prev(node); + node = new op_t(kind); + node->set_left(prev); + node->set_right(parse_add_expr(in, _flags)); + + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + + if (negate) { + prev = node; + node = new op_t(op_t::O_NOT); + node->set_left(prev); + } + } + } + } + + exit_loop: + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_and_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node(parse_logic_expr(in, tflags)); + + if (node && ! tflags.has_flags(PARSE_SINGLE)) { + while (true) { + token_t& tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + + if (tok.kind == token_t::KW_AND) { + ptr_op_t prev(node); + node = new op_t(op_t::O_AND); + node->set_left(prev); + node->set_right(parse_logic_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + } else { + push_token(tok); + break; + } + } + } + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_or_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node(parse_and_expr(in, tflags)); + + if (node && ! tflags.has_flags(PARSE_SINGLE)) { + while (true) { + token_t& tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + + if (tok.kind == token_t::KW_OR) { + ptr_op_t prev(node); + node = new op_t(op_t::O_OR); + node->set_left(prev); + node->set_right(parse_and_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + } else { + push_token(tok); + break; + } + } + } + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_querycolon_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node(parse_or_expr(in, tflags)); + + if (node && ! tflags.has_flags(PARSE_SINGLE)) { + token_t& tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + + if (tok.kind == token_t::QUERY) { + ptr_op_t prev(node); + node = new op_t(op_t::O_QUERY); + node->set_left(prev); + node->set_right(parse_or_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + + token_t& next_tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + if (next_tok.kind != token_t::COLON) + next_tok.expected(':'); + + prev = node->right(); + ptr_op_t subnode = new op_t(op_t::O_COLON); + subnode->set_left(prev); + subnode->set_right(parse_or_expr(in, tflags)); + if (! subnode->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + + node->set_right(subnode); + } + else if (tok.kind == token_t::KW_IF) { + ptr_op_t if_op(parse_or_expr(in, tflags)); + if (! if_op) + throw_(parse_error, _("'if' keyword not followed by argument")); + + tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + if (tok.kind == token_t::KW_ELSE) { + ptr_op_t else_op(parse_or_expr(in, tflags)); + if (! else_op) + throw_(parse_error, _("'else' keyword not followed by argument")); + + ptr_op_t subnode = new op_t(op_t::O_COLON); + subnode->set_left(node); + subnode->set_right(else_op); + + node = new op_t(op_t::O_QUERY); + node->set_left(if_op); + node->set_right(subnode); + } else { + ptr_op_t null_node = new op_t(op_t::VALUE); + null_node->set_value(NULL_VALUE); + + ptr_op_t subnode = new op_t(op_t::O_COLON); + subnode->set_left(node); + subnode->set_right(null_node); + + node = new op_t(op_t::O_QUERY); + node->set_left(if_op); + node->set_right(subnode); + + push_token(tok); + } + } + else { + push_token(tok); + } + } + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse_value_expr(std::istream& in, + const parse_flags_t& tflags) const +{ + ptr_op_t node(parse_querycolon_expr(in, tflags)); + + if (node && ! tflags.has_flags(PARSE_SINGLE)) { + token_t& tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + + if (tok.kind == token_t::COMMA || tok.kind == token_t::SEMI) { + bool comma_op = tok.kind == token_t::COMMA; + + ptr_op_t prev(node); + node = new op_t(comma_op ? op_t::O_CONS : op_t::O_SEQ); + node->set_left(prev); + node->set_right(parse_value_expr(in, tflags)); + if (! node->right()) + throw_(parse_error, + _("%1 operator not followed by argument") << tok.symbol); + + tok = next_token(in, tflags.plus_flags(PARSE_OP_CONTEXT)); + } + + if (tok.kind != token_t::TOK_EOF) { + if (tflags.has_flags(PARSE_PARTIAL)) + push_token(tok); + else + tok.unexpected(); + } + } + else if (! tflags.has_flags(PARSE_PARTIAL) && + ! tflags.has_flags(PARSE_SINGLE)) { + throw_(parse_error, _("Failed to parse value expression")); + } + + return node; +} + +expr_t::ptr_op_t +expr_t::parser_t::parse(std::istream& in, + const parse_flags_t& flags, + const optional<string>& original_string) +{ + try { + ptr_op_t top_node = parse_value_expr(in, flags); + + if (use_lookahead) { + use_lookahead = false; + lookahead.rewind(in); + } + lookahead.clear(); + + return top_node; + } + catch (const std::exception& err) { + if (original_string) { + add_error_context(_("While parsing value expression:")); + + std::streamoff end_pos = 0; + if (in.good()) + end_pos = in.tellg(); + std::streamoff pos = end_pos; + + if (pos > 0) + pos -= lookahead.length; + + DEBUG("parser.error", "original_string = '" << *original_string << "'"); + DEBUG("parser.error", " pos = " << pos); + DEBUG("parser.error", " end_pos = " << end_pos); + DEBUG("parser.error", " token kind = " << int(lookahead.kind)); + DEBUG("parser.error", " token length = " << lookahead.length); + + add_error_context(line_context(*original_string, + static_cast<string::size_type>(pos), + static_cast<string::size_type>(end_pos))); + } + throw; + } +} + +} // namespace ledger |