diff options
Diffstat (limited to 'src/token.cc')
-rw-r--r-- | src/token.cc | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/src/token.cc b/src/token.cc new file mode 100644 index 00000000..81c54a82 --- /dev/null +++ b/src/token.cc @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2003-2009, John Wiegley. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of New Artisans LLC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <system.hh> + +#include "token.h" +#include "parser.h" + +namespace ledger { + +int expr_t::token_t::parse_reserved_word(std::istream& in) +{ + char c = static_cast<char>(in.peek()); + + if (c == 'a' || c == 'd' || c == 'e' || c == 'f' || + c == 'i' || c == 'o' || c == 'n' || c == 't') { + length = 0; + + char buf[6]; + READ_INTO_(in, buf, 5, c, length, std::isalpha(c)); + + switch (buf[0]) { + case 'a': + if (std::strcmp(buf, "and") == 0) { + symbol[0] = '&'; + symbol[1] = '\0'; + kind = KW_AND; + return 1; + } + break; + + case 'd': + if (std::strcmp(buf, "div") == 0) { + symbol[0] = '/'; + symbol[1] = '/'; + symbol[2] = '\0'; + kind = KW_DIV; + return 1; + } + break; + + case 'e': + if (std::strcmp(buf, "else") == 0) { + symbol[0] = 'L'; + symbol[1] = 'S'; + symbol[2] = '\0'; + kind = KW_ELSE; + return 1; + } + break; + + case 'f': + if (std::strcmp(buf, "false") == 0) { + kind = VALUE; + value = false; + return 1; + } + break; + + case 'i': + if (std::strcmp(buf, "if") == 0) { + symbol[0] = 'i'; + symbol[1] = 'f'; + symbol[2] = '\0'; + kind = KW_IF; + return 1; + } + break; + + case 'o': + if (std::strcmp(buf, "or") == 0) { + symbol[0] = '|'; + symbol[1] = '\0'; + kind = KW_OR; + return 1; + } + break; + + case 'n': + if (std::strcmp(buf, "not") == 0) { + symbol[0] = '!'; + symbol[1] = '\0'; + kind = EXCLAM; + return 1; + } + break; + + case 't': + if (std::strcmp(buf, "true") == 0) { + kind = VALUE; + value = true; + return 1; + } + break; + } + + return 0; + } + return -1; +} + +void expr_t::token_t::parse_ident(std::istream& in) +{ + kind = IDENT; + length = 0; + + char c, buf[256]; + READ_INTO_(in, buf, 255, c, length, std::isalnum(c) || c == '_'); + + value.set_string(buf); +} + +void expr_t::token_t::next(std::istream& in, const parse_flags_t& pflags) +{ + if (in.eof()) { + kind = TOK_EOF; + return; + } + if (! in.good()) + throw_(parse_error, _("Input stream no longer valid")); + + char c = peek_next_nonws(in); + + if (in.eof()) { + kind = TOK_EOF; + return; + } + if (! in.good()) + throw_(parse_error, _("Input stream no longer valid")); + + symbol[0] = c; + symbol[1] = '\0'; + + length = 1; + + switch (c) { + case '&': + in.get(c); + c = static_cast<char>(in.peek()); + if (c == '&') { + in.get(c); + kind = KW_AND; + length = 2; + break; + } + kind = KW_AND; + break; + case '|': + in.get(c); + c = static_cast<char>(in.peek()); + if (c == '|') { + in.get(c); + kind = KW_OR; + length = 2; + break; + } + kind = KW_OR; + break; + + case '(': + in.get(c); + kind = LPAREN; + break; + case ')': + in.get(c); + kind = RPAREN; + break; + + case '[': { + in.get(c); + + char buf[256]; + READ_INTO_(in, buf, 255, c, length, c != ']'); + if (c != ']') + expected(']', c); + + in.get(c); + length++; + + date_interval_t timespan(buf); + optional<date_t> begin = timespan.begin(); + if (! begin) + throw_(parse_error, + _("Date specifier does not refer to a starting date")); + kind = VALUE; + value = *begin; + break; + } + + case '\'': + case '"': { + char delim; + in.get(delim); + char buf[4096]; + READ_INTO_(in, buf, 4095, c, length, c != delim); + if (c != delim) + expected(delim, c); + in.get(c); + length++; + kind = VALUE; + value.set_string(buf); + break; + } + + case '{': { + in.get(c); + amount_t temp; + temp.parse(in, PARSE_NO_MIGRATE); + in.get(c); + if (c != '}') + expected('}', c); + length++; + kind = VALUE; + value = temp; + break; + } + + case '!': + in.get(c); + c = static_cast<char>(in.peek()); + if (c == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = NEQUAL; + length = 2; + break; + } + else if (c == '~') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = NMATCH; + length = 2; + break; + } + kind = EXCLAM; + break; + + case '-': + in.get(c); + kind = MINUS; + break; + case '+': + in.get(c); + kind = PLUS; + break; + + case '*': + in.get(c); + kind = STAR; + break; + + case '?': + in.get(c); + kind = QUERY; + break; + case ':': + in.get(c); + c = static_cast<char>(in.peek()); + if (c == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = DEFINE; + length = 2; + break; + } + kind = COLON; + break; + + case '/': { + in.get(c); + if (pflags.has_flags(PARSE_OP_CONTEXT)) { // operator context + kind = SLASH; + } else { // terminal context + // Read in the regexp + char buf[256]; + READ_INTO_(in, buf, 255, c, length, c != '/'); + if (c != '/') + expected('/', c); + in.get(c); + length++; + + kind = VALUE; + value.set_mask(buf); + } + break; + } + + case '=': + in.get(c); + c = static_cast<char>(in.peek()); + if (c == '~') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = MATCH; + length = 2; + break; + } + else if (c == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = EQUAL; + length = 2; + break; + } + kind = EQUAL; + break; + + case '<': + in.get(c); + if (static_cast<char>(in.peek()) == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = LESSEQ; + length = 2; + break; + } + kind = LESS; + break; + + case '>': + in.get(c); + if (static_cast<char>(in.peek()) == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = GREATEREQ; + length = 2; + break; + } + kind = GREATER; + break; + + case '.': + in.get(c); + kind = DOT; + break; + + case ',': + in.get(c); + kind = COMMA; + break; + + case ';': + in.get(c); + kind = SEMI; + break; + + default: { + istream_pos_type pos = in.tellg(); + + // First, check to see if it's a reserved word, such as: and or not + int result = parse_reserved_word(in); + if (std::isalpha(c) && result == 1) + break; + + // If not, rewind back to the beginning of the word to scan it + // again. If the result was -1, it means no identifier was scanned + // so we don't have to rewind. + if (result == 0) { + in.clear(); + in.seekg(pos, std::ios::beg); + if (in.fail()) + throw_(parse_error, _("Failed to reset input stream")); + } + + // When in relaxed parsing mode, we want to migrate commodity flags + // so that any precision specified by the user updates the current + // maximum displayed precision. + parse_flags_t parse_flags; + + if (pflags.has_flags(PARSE_NO_MIGRATE)) + parse_flags.add_flags(PARSE_NO_MIGRATE); + if (pflags.has_flags(PARSE_NO_REDUCE)) + parse_flags.add_flags(PARSE_NO_REDUCE); + + try { + amount_t temp; + if (! temp.parse(in, parse_flags.plus_flags(PARSE_SOFT_FAIL))) { + // If the amount had no commodity, it must be an unambiguous + // variable reference + + in.clear(); + in.seekg(pos, std::ios::beg); + if (in.fail()) + throw_(parse_error, _("Failed to reset input stream")); + + c = static_cast<char>(in.peek()); + if (std::isdigit(c) || c == '.') + expected('\0', c); + + parse_ident(in); + } else { + kind = VALUE; + value = temp; + length = static_cast<std::size_t>(in.tellg() - pos); + } + } + catch (const std::exception& err) { + kind = ERROR; + length = static_cast<std::size_t>(in.tellg() - pos); + throw; + } + break; + } + } +} + +void expr_t::token_t::rewind(std::istream& in) +{ + in.seekg(- length, std::ios::cur); + if (in.fail()) + throw_(parse_error, _("Failed to rewind input stream")); +} + + +void expr_t::token_t::unexpected() +{ + kind_t prev_kind = kind; + + kind = ERROR; + + switch (prev_kind) { + case TOK_EOF: + throw_(parse_error, _("Unexpected end of expression")); + case IDENT: + throw_(parse_error, _("Unexpected symbol '%1'") << value); + case VALUE: + throw_(parse_error, _("Unexpected value '%1'") << value); + default: + throw_(parse_error, _("Unexpected token '%1'") << symbol); + } +} + +void expr_t::token_t::expected(char wanted, char c) +{ + kind = ERROR; + + if (c == '\0' || c == -1) { + if (wanted == '\0' || wanted == -1) + throw_(parse_error, _("Unexpected end")); + else + throw_(parse_error, _("Missing '%1'") << wanted); + } else { + if (wanted == '\0' || wanted == -1) + throw_(parse_error, _("Invalid char '%1'") << c); + else + throw_(parse_error, _("Invalid char '%1' (wanted '%2')") << c << wanted); + } +} + +} // namespace ledger |