1 files changed, 398 insertions, 0 deletions
diff --git a/token.cc b/token.cc
new file mode 100644
index 00000000..3d5eeb21
--- /dev/null
+++ b/token.cc
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2003-2008, John Wiegley.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of New Artisans LLC nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "token.h"
+#include "parser.h"
+
+namespace ledger {
+
+void expr_t::token_t::parse_ident(std::istream& in)
+{
+  if (in.eof()) {
+    kind = TOK_EOF;
+    return;
+  }
+  assert(in.good());
+
+  char c = peek_next_nonws(in);
+
+  if (in.eof()) {
+    kind = TOK_EOF;
+    return;
+  }
+  assert(in.good());
+
+  kind	 = IDENT;
+  length = 0;
+
+  clear_flags();
+
+  char buf[256];
+  READ_INTO_(in, buf, 255, c, length,
+	     std::isalnum(c) || c == '_' || c == '.' || c == '-');
+
+  switch (buf[0]) {
+#if 0
+  case 'a':
+    if (std::strcmp(buf, "and") == 0)
+      kind = KW_AND;
+    break;
+  case 'd':
+    if (std::strcmp(buf, "div") == 0)
+      kind = KW_DIV;
+    break;
+  case 'e':
+    if (std::strcmp(buf, "eq") == 0)
+      kind = EQUAL;
+    break;
+#endif
+  case 'f':
+    if (std::strcmp(buf, "false") == 0) {
+      kind = VALUE;
+      value = false;
+    }
+    break;
+#if 0
+  case 'g':
+    if (std::strcmp(buf, "gt") == 0)
+      kind = GREATER;
+    else if (std::strcmp(buf, "ge") == 0)
+      kind = GREATEREQ;
+    break;
+  case 'i':
+    if (std::strcmp(buf, "is") == 0)
+      kind = EQUAL;
+    break;
+  case 'l':
+    if (std::strcmp(buf, "lt") == 0)
+      kind = LESS;
+    else if (std::strcmp(buf, "le") == 0)
+      kind = LESSEQ;
+    break;
+  case 'm':
+    if (std::strcmp(buf, "mod") == 0)
+      kind = KW_MOD;
+    break;
+  case 'n':
+    if (std::strcmp(buf, "ne") == 0)
+      kind = NEQUAL;
+    break;
+  case 'o':
+    if (std::strcmp(buf, "or") == 0)
+      kind = KW_OR;
+    break;
+#endif
+  case 't':
+    if (std::strcmp(buf, "true") == 0) {
+      kind = VALUE;
+      value = true;
+    }
+    break;
+  }
+
+  if (kind == IDENT)
+    value.set_string(buf);
+}
+
+void expr_t::token_t::next(std::istream& in, const unsigned int pflags)
+{
+  if (in.eof()) {
+    kind = TOK_EOF;
+    return;
+  }
+  assert(in.good());
+
+  char c = peek_next_nonws(in);
+
+  if (in.eof()) {
+    kind = TOK_EOF;
+    return;
+  }
+  assert(in.good());
+
+  symbol[0] = c;
+  symbol[1] = '\0';
+
+  length = 1;
+
+  switch (c) {
+  case '&':
+    in.get(c);
+    kind = KW_AND;
+    break;
+
+  case '(':
+    in.get(c);
+    kind = LPAREN;
+    break;
+  case ')':
+    in.get(c);
+    kind = RPAREN;
+    break;
+
+  case '[': {
+    in.get(c);
+
+    char buf[256];
+    READ_INTO_(in, buf, 255, c, length, c != ']');
+    if (c != ']')
+      unexpected(c, ']');
+
+    in.get(c);
+    length++;
+
+    interval_t timespan(buf);
+    kind  = VALUE;
+    value = timespan.first();
+    break;
+  }
+
+  case '\'':
+  case '"': {
+    char delim;
+    in.get(delim);
+    char buf[4096];
+    READ_INTO_(in, buf, 4095, c, length, c != delim);
+    if (c != delim)
+      unexpected(c, delim);
+    in.get(c);
+    length++;
+    kind = VALUE;
+    value.set_string(buf);
+    break;
+  }
+
+  case '{': {
+    in.get(c);
+    amount_t temp;
+    temp.parse(in, AMOUNT_PARSE_NO_MIGRATE);
+    in.get(c);
+    if (c != '}')
+      unexpected(c, '}');
+    length++;
+    kind  = VALUE;
+    value = temp;
+    break;
+  }
+
+  case '!':
+    in.get(c);
+    c = in.peek();
+    if (c == '=') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = NEQUAL;
+      length = 2;
+      break;
+    }
+    kind = EXCLAM;
+    break;
+
+  case '-':
+    in.get(c);
+    kind = MINUS;
+    break;
+  case '+':
+    in.get(c);
+    kind = PLUS;
+    break;
+
+  case '*':
+    in.get(c);
+    kind = STAR;
+    break;
+
+  case 'c':
+  case 'C':
+  case 'p':
+  case 'w':
+  case 'W':
+  case 'e':
+  case '/': {
+    bool code_mask	    = c == 'c';
+    bool commodity_mask	    = c == 'C';
+    bool payee_mask	    = c == 'p';
+    bool note_mask	    = c == 'e';
+    bool short_account_mask = c == 'w';
+
+    in.get(c);
+    if (c == '/') {
+      c = peek_next_nonws(in);
+      if (c == '/') {
+	in.get(c);
+	c = in.peek();
+	if (c == '/') {
+	  in.get(c);
+	  c = in.peek();
+	  short_account_mask = true;
+	} else {
+	  payee_mask = true;
+	}
+      }
+    } else {
+      in.get(c);
+    }
+
+    // Read in the regexp
+    char buf[256];
+    READ_INTO_(in, buf, 255, c, length, c != '/');
+    if (c != '/')
+      unexpected(c, '/');
+    in.get(c);
+    length++;
+
+    if (short_account_mask)
+      set_flags(TOKEN_SHORT_ACCOUNT_MASK);
+    else if (code_mask)
+      set_flags(TOKEN_CODE_MASK);
+    else if (commodity_mask)
+      set_flags(TOKEN_COMMODITY_MASK);
+    else if (payee_mask)
+      set_flags(TOKEN_PAYEE_MASK);
+    else if (note_mask)
+      set_flags(TOKEN_NOTE_MASK);
+    else
+      set_flags(TOKEN_ACCOUNT_MASK);
+
+    kind = MASK;
+    value.set_string(buf);
+    break;
+  }
+
+  case '=':
+    in.get(c);
+    kind = EQUAL;
+    break;
+
+  case '<':
+    in.get(c);
+    if (in.peek() == '=') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = LESSEQ;
+      length = 2;
+      break;
+    }
+    kind = LESS;
+    break;
+
+  case '>':
+    in.get(c);
+    if (in.peek() == '=') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = GREATEREQ;
+      length = 2;
+      break;
+    }
+    kind = GREATER;
+    break;
+
+  case ',':
+    in.get(c);
+    kind = COMMA;
+    break;
+
+  default: {
+    amount_t temp;
+    unsigned long pos = 0;
+
+    // When in relaxed parsing mode, we want to migrate commodity
+    // flags so that any precision specified by the user updates the
+    // current maximum displayed precision.
+    pos = (long)in.tellg();
+
+    amount_t::flags_t parse_flags = 0;
+    if (pflags & EXPR_PARSE_NO_MIGRATE)
+      parse_flags |= AMOUNT_PARSE_NO_MIGRATE;
+    if (pflags & EXPR_PARSE_NO_REDUCE)
+      parse_flags |= AMOUNT_PARSE_NO_REDUCE;
+
+    if (! temp.parse(in, parse_flags | AMOUNT_PARSE_SOFT_FAIL)) {
+      // If the amount had no commodity, it must be an unambiguous
+      // variable reference
+
+      in.clear();
+      in.seekg(pos, std::ios::beg);
+
+      c = in.peek();
+      assert(! (std::isdigit(c) || c == '.'));
+      parse_ident(in);
+    } else {
+      kind = VALUE;
+      value = temp;
+    }
+    break;
+  }
+  }
+}
+
+void expr_t::token_t::rewind(std::istream& in)
+{
+  for (unsigned int i = 0; i < length; i++)
+    in.unget();
+}
+
+
+void expr_t::token_t::unexpected()
+{
+  switch (kind) {
+  case TOK_EOF:
+    throw_(parse_error, "Unexpected end of expression");
+  case IDENT:
+    throw_(parse_error, "Unexpected symbol '" << value << "'");
+  case VALUE:
+    throw_(parse_error, "Unexpected value '" << value << "'");
+  default:
+    throw_(parse_error, "Unexpected operator '" << symbol << "'");
+  }
+}
+
+void expr_t::token_t::unexpected(char c, char wanted)
+{
+  if ((unsigned char) c == 0xff) {
+    if (wanted)
+      throw_(parse_error, "Missing '" << wanted << "'");
+    else
+      throw_(parse_error, "Unexpected end");
+  } else {
+    if (wanted)
+      throw_(parse_error, "Invalid char '" << c
+	     << "' (wanted '" << wanted << "')");
+    else
+      throw_(parse_error, "Invalid char '" << c << "'");
+  }
+}
+
+} // namespace ledger