1 files changed, 485 insertions, 0 deletions
diff --git a/src/token.cc b/src/token.cc
new file mode 100644
index 00000000..81c54a82
--- /dev/null
+++ b/src/token.cc
@@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 2003-2009, John Wiegley.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of New Artisans LLC nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <system.hh>
+
+#include "token.h"
+#include "parser.h"
+
+namespace ledger {
+
+int expr_t::token_t::parse_reserved_word(std::istream& in)
+{
+  char c = static_cast<char>(in.peek());
+
+  if (c == 'a' || c == 'd' || c == 'e' || c == 'f' ||
+      c == 'i' || c == 'o' || c == 'n' || c == 't') {
+    length = 0;
+
+    char buf[6];
+    READ_INTO_(in, buf, 5, c, length, std::isalpha(c));
+
+    switch (buf[0]) {
+    case 'a':
+      if (std::strcmp(buf, "and") == 0) {
+	symbol[0] = '&';
+	symbol[1] = '\0';
+	kind = KW_AND;
+	return 1;
+      }
+      break;
+
+    case 'd':
+      if (std::strcmp(buf, "div") == 0) {
+	symbol[0] = '/';
+	symbol[1] = '/';
+	symbol[2] = '\0';
+	kind = KW_DIV;
+	return 1;
+      }
+      break;
+
+    case 'e':
+      if (std::strcmp(buf, "else") == 0) {
+	symbol[0] = 'L';
+	symbol[1] = 'S';
+	symbol[2] = '\0';
+	kind = KW_ELSE;
+	return 1;
+      }
+      break;
+
+    case 'f':
+      if (std::strcmp(buf, "false") == 0) {
+	kind = VALUE;
+	value = false;
+	return 1;
+      }
+      break;
+
+    case 'i':
+      if (std::strcmp(buf, "if") == 0) {
+	symbol[0] = 'i';
+	symbol[1] = 'f';
+	symbol[2] = '\0';
+	kind = KW_IF;
+	return 1;
+      }
+      break;
+
+    case 'o':
+      if (std::strcmp(buf, "or") == 0) {
+	symbol[0] = '|';
+	symbol[1] = '\0';
+	kind = KW_OR;
+	return 1;
+      }
+      break;
+
+    case 'n':
+      if (std::strcmp(buf, "not") == 0) {
+	symbol[0] = '!';
+	symbol[1] = '\0';
+	kind = EXCLAM;
+	return 1;
+      }
+      break;
+
+    case 't':
+      if (std::strcmp(buf, "true") == 0) {
+	kind = VALUE;
+	value = true;
+	return 1;
+      }
+      break;
+    }
+
+    return 0;
+  }
+  return -1;
+}
+
+void expr_t::token_t::parse_ident(std::istream& in)
+{
+  kind	 = IDENT;
+  length = 0;
+
+  char c, buf[256];
+  READ_INTO_(in, buf, 255, c, length, std::isalnum(c) || c == '_');
+
+  value.set_string(buf);
+}
+
+void expr_t::token_t::next(std::istream& in, const parse_flags_t& pflags)
+{
+  if (in.eof()) {
+    kind = TOK_EOF;
+    return;
+  }
+  if (! in.good())
+    throw_(parse_error, _("Input stream no longer valid"));
+
+  char c = peek_next_nonws(in);
+
+  if (in.eof()) {
+    kind = TOK_EOF;
+    return;
+  }
+  if (! in.good())
+    throw_(parse_error, _("Input stream no longer valid"));
+
+  symbol[0] = c;
+  symbol[1] = '\0';
+
+  length = 1;
+
+  switch (c) {
+  case '&':
+    in.get(c);
+    c = static_cast<char>(in.peek());
+    if (c == '&') {
+      in.get(c);
+      kind = KW_AND;
+      length = 2;
+      break;
+    }
+    kind = KW_AND;
+    break;
+  case '|':
+    in.get(c);
+    c = static_cast<char>(in.peek());
+    if (c == '|') {
+      in.get(c);
+      kind = KW_OR;
+      length = 2;
+      break;
+    }
+    kind = KW_OR;
+    break;
+
+  case '(':
+    in.get(c);
+    kind = LPAREN;
+    break;
+  case ')':
+    in.get(c);
+    kind = RPAREN;
+    break;
+
+  case '[': {
+    in.get(c);
+
+    char buf[256];
+    READ_INTO_(in, buf, 255, c, length, c != ']');
+    if (c != ']')
+      expected(']', c);
+
+    in.get(c);
+    length++;
+
+    date_interval_t timespan(buf);
+    optional<date_t> begin = timespan.begin();
+    if (! begin)
+      throw_(parse_error,
+	     _("Date specifier does not refer to a starting date"));
+    kind  = VALUE;
+    value = *begin;
+    break;
+  }
+
+  case '\'':
+  case '"': {
+    char delim;
+    in.get(delim);
+    char buf[4096];
+    READ_INTO_(in, buf, 4095, c, length, c != delim);
+    if (c != delim)
+      expected(delim, c);
+    in.get(c);
+    length++;
+    kind = VALUE;
+    value.set_string(buf);
+    break;
+  }
+
+  case '{': {
+    in.get(c);
+    amount_t temp;
+    temp.parse(in, PARSE_NO_MIGRATE);
+    in.get(c);
+    if (c != '}')
+      expected('}', c);
+    length++;
+    kind  = VALUE;
+    value = temp;
+    break;
+  }
+
+  case '!':
+    in.get(c);
+    c = static_cast<char>(in.peek());
+    if (c == '=') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = NEQUAL;
+      length = 2;
+      break;
+    }
+    else if (c == '~') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = NMATCH;
+      length = 2;
+      break;
+    }
+    kind = EXCLAM;
+    break;
+
+  case '-':
+    in.get(c);
+    kind = MINUS;
+    break;
+  case '+':
+    in.get(c);
+    kind = PLUS;
+    break;
+
+  case '*':
+    in.get(c);
+    kind = STAR;
+    break;
+
+  case '?':
+    in.get(c);
+    kind = QUERY;
+    break;
+  case ':':
+    in.get(c);
+    c = static_cast<char>(in.peek());
+    if (c == '=') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = DEFINE;
+      length = 2;
+      break;
+    }
+    kind = COLON;
+    break;
+
+  case '/': {
+    in.get(c);
+    if (pflags.has_flags(PARSE_OP_CONTEXT)) { // operator context
+      kind = SLASH;
+    } else {			// terminal context
+      // Read in the regexp
+      char buf[256];
+      READ_INTO_(in, buf, 255, c, length, c != '/');
+      if (c != '/')
+	expected('/', c);
+      in.get(c);
+      length++;
+
+      kind = VALUE;
+      value.set_mask(buf);
+    }
+    break;
+  }
+
+  case '=':
+    in.get(c);
+    c = static_cast<char>(in.peek());
+    if (c == '~') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = MATCH;
+      length = 2;
+      break;
+    }
+    else if (c == '=') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = EQUAL;
+      length = 2;
+      break;
+    }
+    kind = EQUAL;
+    break;
+
+  case '<':
+    in.get(c);
+    if (static_cast<char>(in.peek()) == '=') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = LESSEQ;
+      length = 2;
+      break;
+    }
+    kind = LESS;
+    break;
+
+  case '>':
+    in.get(c);
+    if (static_cast<char>(in.peek()) == '=') {
+      in.get(c);
+      symbol[1] = c;
+      symbol[2] = '\0';
+      kind = GREATEREQ;
+      length = 2;
+      break;
+    }
+    kind = GREATER;
+    break;
+
+  case '.':
+    in.get(c);
+    kind = DOT;
+    break;
+
+  case ',':
+    in.get(c);
+    kind = COMMA;
+    break;
+
+  case ';':
+    in.get(c);
+    kind = SEMI;
+    break;
+
+  default: {
+    istream_pos_type pos = in.tellg();
+
+    // First, check to see if it's a reserved word, such as: and or not
+    int result = parse_reserved_word(in);
+    if (std::isalpha(c) && result == 1)
+      break;
+
+    // If not, rewind back to the beginning of the word to scan it
+    // again.  If the result was -1, it means no identifier was scanned
+    // so we don't have to rewind.
+    if (result == 0) {
+      in.clear();
+      in.seekg(pos, std::ios::beg);
+      if (in.fail())
+	throw_(parse_error, _("Failed to reset input stream"));
+    }
+
+    // When in relaxed parsing mode, we want to migrate commodity flags
+    // so that any precision specified by the user updates the current
+    // maximum displayed precision.
+    parse_flags_t parse_flags;
+
+    if (pflags.has_flags(PARSE_NO_MIGRATE))
+      parse_flags.add_flags(PARSE_NO_MIGRATE);
+    if (pflags.has_flags(PARSE_NO_REDUCE))
+      parse_flags.add_flags(PARSE_NO_REDUCE);
+
+    try {
+      amount_t temp;
+      if (! temp.parse(in, parse_flags.plus_flags(PARSE_SOFT_FAIL))) {
+	// If the amount had no commodity, it must be an unambiguous
+	// variable reference
+
+	in.clear();
+	in.seekg(pos, std::ios::beg);
+	if (in.fail())
+	  throw_(parse_error, _("Failed to reset input stream"));
+
+	c = static_cast<char>(in.peek());
+	if (std::isdigit(c) || c == '.')
+	  expected('\0', c);
+
+	parse_ident(in);
+      } else {
+	kind   = VALUE;
+	value  = temp;
+	length = static_cast<std::size_t>(in.tellg() - pos);
+      }
+    }
+    catch (const std::exception& err) {
+      kind   = ERROR;
+      length = static_cast<std::size_t>(in.tellg() - pos);
+      throw;
+    }
+    break;
+  }
+  }
+}
+
+void expr_t::token_t::rewind(std::istream& in)
+{
+  in.seekg(- length, std::ios::cur);
+  if (in.fail())
+    throw_(parse_error, _("Failed to rewind input stream"));
+}
+
+
+void expr_t::token_t::unexpected()
+{
+  kind_t prev_kind = kind;
+
+  kind = ERROR;
+
+  switch (prev_kind) {
+  case TOK_EOF:
+    throw_(parse_error, _("Unexpected end of expression"));
+  case IDENT:
+    throw_(parse_error, _("Unexpected symbol '%1'") << value);
+  case VALUE:
+    throw_(parse_error, _("Unexpected value '%1'") << value);
+  default:
+    throw_(parse_error, _("Unexpected token '%1'") << symbol);
+  }
+}
+
+void expr_t::token_t::expected(char wanted, char c)
+{
+  kind = ERROR;
+
+  if (c == '\0' || c == -1) {
+    if (wanted == '\0' || wanted == -1)
+      throw_(parse_error, _("Unexpected end"));
+    else
+      throw_(parse_error, _("Missing '%1'") << wanted);
+  } else {
+    if (wanted == '\0' || wanted == -1)
+      throw_(parse_error, _("Invalid char '%1'") << c);
+    else
+      throw_(parse_error, _("Invalid char '%1' (wanted '%2')") << c << wanted);
+  }
+}
+
+} // namespace ledger