From 86a23cd263a2e67351a3d748ffc69d65f4746184 Mon Sep 17 00:00:00 2001
From: Christoph Dittmann <github@christoph-d.de>
Date: Sun, 10 Jun 2018 13:03:53 +0100
Subject: Remove TOK_A_YEAR token

This fixes #1626.

The tokenizer eagerly classifies 4-digit integers as TOK_A_YEAR
tokens.  In some contexts such as "every 1000 years", this causes
errors.

I think the tokenizer does not have enough information available to
distinguish between integers and years.

After this patch, the tokenizer will always classify integers as
TOK_INT tokens.  The "has 4 digits" heuristic to determine if an
integer is a year is moved to the place where it's actually
needed (and it can be slightly more generic there, too).
---
 src/times.cc | 31 ++++++++-----------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

(limited to 'src/times.cc')
diff --git a/src/times.cc b/src/times.cc
index 8e4df020..eda71ae7 100644
--- a/src/times.cc
+++ b/src/times.cc
@@ -420,7 +420,6 @@ class date_parser_t
         TOK_DASH,
         TOK_DOT,
 
-        TOK_A_YEAR,
         TOK_A_MONTH,
         TOK_A_WDAY,
 
@@ -512,9 +511,6 @@ class date_parser_t
         case TOK_SLASH:     return "/";
         case TOK_DASH:      return "-";
         case TOK_DOT:       return ".";
-        case TOK_A_YEAR:
-          out << boost::get<date_specifier_t::year_type>(*value);
-          break;
         case TOK_A_MONTH:
           out << date_specifier_t::month_type
             (boost::get<date_time::months_of_year>(*value));
@@ -566,7 +562,6 @@ class date_parser_t
         case TOK_SLASH:     out << "TOK_SLASH"; break;
         case TOK_DASH:      out << "TOK_DASH"; break;
         case TOK_DOT:       out << "TOK_DOT"; break;
-        case TOK_A_YEAR:    out << "TOK_A_YEAR"; break;
         case TOK_A_MONTH:   out << "TOK_A_MONTH"; break;
         case TOK_A_WDAY:    out << "TOK_A_WDAY"; break;
         case TOK_AGO:       out << "TOK_AGO"; break;
@@ -727,7 +722,11 @@ void date_parser_t::determine_when(date_parser_t::lexer_t::token_t& tok,
       when += gregorian::days(amount * adjust);
       break;
     default:
-      specifier.day = date_specifier_t::day_type(amount);
+      if (amount > 31) {
+        specifier.year = date_specifier_t::year_type(amount);
+      } else {
+        specifier.day = date_specifier_t::day_type(amount);
+      }
       break;
     }
 
@@ -832,16 +831,13 @@ void date_parser_t::determine_when(date_parser_t::lexer_t::token_t& tok,
     break;
   }
 
-  case lexer_t::token_t::TOK_A_YEAR:
-    specifier.year = boost::get<date_specifier_t::year_type>(*tok.value);
-    break;
   case lexer_t::token_t::TOK_A_MONTH:
     specifier.month =
       date_specifier_t::month_type
         (boost::get<date_time::months_of_year>(*tok.value));
     tok = lexer.peek_token();
     switch (tok.kind) {
-    case lexer_t::token_t::TOK_A_YEAR:
+    case lexer_t::token_t::TOK_INT:
       specifier.year = boost::get<date_specifier_t::year_type>(*tok.value);
       break;
     case lexer_t::token_t::END_REACHED:
@@ -898,12 +894,6 @@ date_interval_t date_parser_t::parse()
       determine_when(tok, *inclusion_specifier);
       break;
 
-    case lexer_t::token_t::TOK_A_YEAR:
-      if (! inclusion_specifier)
-        inclusion_specifier = date_specifier_t();
-      determine_when(tok, *inclusion_specifier);
-      break;
-
     case lexer_t::token_t::TOK_A_MONTH:
       if (! inclusion_specifier)
         inclusion_specifier = date_specifier_t();
@@ -1612,13 +1602,8 @@ date_parser_t::lexer_t::token_t date_parser_t::lexer_t::next_token()
 
   if (! term.empty()) {
     if (std::isdigit(term[0])) {
-      if (term.length() == 4)
-        return token_t(token_t::TOK_A_YEAR,
-                       token_t::content_t
-                       (lexical_cast<date_specifier_t::year_type>(term)));
-      else
-        return token_t(token_t::TOK_INT,
-                       token_t::content_t(lexical_cast<unsigned short>(term)));
+      return token_t(token_t::TOK_INT,
+                     token_t::content_t(lexical_cast<unsigned short>(term)));
     }
     else if (std::isalpha(term[0])) {
       to_lower(term);
-- 
cgit v1.2.3