diff options
Diffstat (limited to 'token.cc')
-rw-r--r-- | token.cc | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/token.cc b/token.cc new file mode 100644 index 00000000..3d5eeb21 --- /dev/null +++ b/token.cc @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2003-2008, John Wiegley. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of New Artisans LLC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "token.h" +#include "parser.h" + +namespace ledger { + +void expr_t::token_t::parse_ident(std::istream& in) +{ + if (in.eof()) { + kind = TOK_EOF; + return; + } + assert(in.good()); + + char c = peek_next_nonws(in); + + if (in.eof()) { + kind = TOK_EOF; + return; + } + assert(in.good()); + + kind = IDENT; + length = 0; + + clear_flags(); + + char buf[256]; + READ_INTO_(in, buf, 255, c, length, + std::isalnum(c) || c == '_' || c == '.' || c == '-'); + + switch (buf[0]) { +#if 0 + case 'a': + if (std::strcmp(buf, "and") == 0) + kind = KW_AND; + break; + case 'd': + if (std::strcmp(buf, "div") == 0) + kind = KW_DIV; + break; + case 'e': + if (std::strcmp(buf, "eq") == 0) + kind = EQUAL; + break; +#endif + case 'f': + if (std::strcmp(buf, "false") == 0) { + kind = VALUE; + value = false; + } + break; +#if 0 + case 'g': + if (std::strcmp(buf, "gt") == 0) + kind = GREATER; + else if (std::strcmp(buf, "ge") == 0) + kind = GREATEREQ; + break; + case 'i': + if (std::strcmp(buf, "is") == 0) + kind = EQUAL; + break; + case 'l': + if (std::strcmp(buf, "lt") == 0) + kind = LESS; + else if (std::strcmp(buf, "le") == 0) + kind = LESSEQ; + break; + case 'm': + if (std::strcmp(buf, "mod") == 0) + kind = KW_MOD; + break; + case 'n': + if (std::strcmp(buf, "ne") == 0) + kind = NEQUAL; + break; + case 'o': + if (std::strcmp(buf, "or") == 0) + kind = KW_OR; + break; +#endif + case 't': + if (std::strcmp(buf, "true") == 0) { + kind = VALUE; + value = true; + } + break; + } + + if (kind == IDENT) + value.set_string(buf); +} + +void expr_t::token_t::next(std::istream& in, const unsigned int pflags) +{ + if (in.eof()) { + kind = TOK_EOF; + return; + } + assert(in.good()); + + char c = peek_next_nonws(in); + + if (in.eof()) { + kind = TOK_EOF; + return; + } + assert(in.good()); + + symbol[0] = c; + symbol[1] = '\0'; + + length = 1; + + switch (c) { + case '&': + in.get(c); + kind = KW_AND; + break; + + case '(': + in.get(c); + kind = LPAREN; + break; + case ')': + in.get(c); + kind = RPAREN; + break; + + case '[': { + in.get(c); + + char buf[256]; + READ_INTO_(in, buf, 255, c, length, c != ']'); + if (c != ']') + unexpected(c, ']'); + + in.get(c); + length++; + + interval_t timespan(buf); + kind = VALUE; + value = timespan.first(); + break; + } + + case '\'': + case '"': { + char delim; + in.get(delim); + char buf[4096]; + READ_INTO_(in, buf, 4095, c, length, c != delim); + if (c != delim) + unexpected(c, delim); + in.get(c); + length++; + kind = VALUE; + value.set_string(buf); + break; + } + + case '{': { + in.get(c); + amount_t temp; + temp.parse(in, AMOUNT_PARSE_NO_MIGRATE); + in.get(c); + if (c != '}') + unexpected(c, '}'); + length++; + kind = VALUE; + value = temp; + break; + } + + case '!': + in.get(c); + c = in.peek(); + if (c == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = NEQUAL; + length = 2; + break; + } + kind = EXCLAM; + break; + + case '-': + in.get(c); + kind = MINUS; + break; + case '+': + in.get(c); + kind = PLUS; + break; + + case '*': + in.get(c); + kind = STAR; + break; + + case 'c': + case 'C': + case 'p': + case 'w': + case 'W': + case 'e': + case '/': { + bool code_mask = c == 'c'; + bool commodity_mask = c == 'C'; + bool payee_mask = c == 'p'; + bool note_mask = c == 'e'; + bool short_account_mask = c == 'w'; + + in.get(c); + if (c == '/') { + c = peek_next_nonws(in); + if (c == '/') { + in.get(c); + c = in.peek(); + if (c == '/') { + in.get(c); + c = in.peek(); + short_account_mask = true; + } else { + payee_mask = true; + } + } + } else { + in.get(c); + } + + // Read in the regexp + char buf[256]; + READ_INTO_(in, buf, 255, c, length, c != '/'); + if (c != '/') + unexpected(c, '/'); + in.get(c); + length++; + + if (short_account_mask) + set_flags(TOKEN_SHORT_ACCOUNT_MASK); + else if (code_mask) + set_flags(TOKEN_CODE_MASK); + else if (commodity_mask) + set_flags(TOKEN_COMMODITY_MASK); + else if (payee_mask) + set_flags(TOKEN_PAYEE_MASK); + else if (note_mask) + set_flags(TOKEN_NOTE_MASK); + else + set_flags(TOKEN_ACCOUNT_MASK); + + kind = MASK; + value.set_string(buf); + break; + } + + case '=': + in.get(c); + kind = EQUAL; + break; + + case '<': + in.get(c); + if (in.peek() == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = LESSEQ; + length = 2; + break; + } + kind = LESS; + break; + + case '>': + in.get(c); + if (in.peek() == '=') { + in.get(c); + symbol[1] = c; + symbol[2] = '\0'; + kind = GREATEREQ; + length = 2; + break; + } + kind = GREATER; + break; + + case ',': + in.get(c); + kind = COMMA; + break; + + default: { + amount_t temp; + unsigned long pos = 0; + + // When in relaxed parsing mode, we want to migrate commodity + // flags so that any precision specified by the user updates the + // current maximum displayed precision. + pos = (long)in.tellg(); + + amount_t::flags_t parse_flags = 0; + if (pflags & EXPR_PARSE_NO_MIGRATE) + parse_flags |= AMOUNT_PARSE_NO_MIGRATE; + if (pflags & EXPR_PARSE_NO_REDUCE) + parse_flags |= AMOUNT_PARSE_NO_REDUCE; + + if (! temp.parse(in, parse_flags | AMOUNT_PARSE_SOFT_FAIL)) { + // If the amount had no commodity, it must be an unambiguous + // variable reference + + in.clear(); + in.seekg(pos, std::ios::beg); + + c = in.peek(); + assert(! (std::isdigit(c) || c == '.')); + parse_ident(in); + } else { + kind = VALUE; + value = temp; + } + break; + } + } +} + +void expr_t::token_t::rewind(std::istream& in) +{ + for (unsigned int i = 0; i < length; i++) + in.unget(); +} + + +void expr_t::token_t::unexpected() +{ + switch (kind) { + case TOK_EOF: + throw_(parse_error, "Unexpected end of expression"); + case IDENT: + throw_(parse_error, "Unexpected symbol '" << value << "'"); + case VALUE: + throw_(parse_error, "Unexpected value '" << value << "'"); + default: + throw_(parse_error, "Unexpected operator '" << symbol << "'"); + } +} + +void expr_t::token_t::unexpected(char c, char wanted) +{ + if ((unsigned char) c == 0xff) { + if (wanted) + throw_(parse_error, "Missing '" << wanted << "'"); + else + throw_(parse_error, "Unexpected end"); + } else { + if (wanted) + throw_(parse_error, "Invalid char '" << c + << "' (wanted '" << wanted << "')"); + else + throw_(parse_error, "Invalid char '" << c << "'"); + } +} + +} // namespace ledger |