From 86a23cd263a2e67351a3d748ffc69d65f4746184 Mon Sep 17 00:00:00 2001 From: Christoph Dittmann Date: Sun, 10 Jun 2018 13:03:53 +0100 Subject: Remove TOK_A_YEAR token This fixes #1626. The tokenizer eagerly classifies 4-digit integers as TOK_A_YEAR tokens. In some contexts such as "every 1000 years", this causes errors. I think the tokenizer does not have enough information available to distinguish between integers and years. After this patch, the tokenizer will always classify integers as TOK_INT tokens. The "has 4 digits" heuristic to determine if an integer is a year is moved to the place where it's actually needed (and it can be slightly more generic there, too). --- src/times.cc | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) (limited to 'src/times.cc') diff --git a/src/times.cc b/src/times.cc index 8e4df020..eda71ae7 100644 --- a/src/times.cc +++ b/src/times.cc @@ -420,7 +420,6 @@ class date_parser_t TOK_DASH, TOK_DOT, - TOK_A_YEAR, TOK_A_MONTH, TOK_A_WDAY, @@ -512,9 +511,6 @@ class date_parser_t case TOK_SLASH: return "/"; case TOK_DASH: return "-"; case TOK_DOT: return "."; - case TOK_A_YEAR: - out << boost::get(*value); - break; case TOK_A_MONTH: out << date_specifier_t::month_type (boost::get(*value)); @@ -566,7 +562,6 @@ class date_parser_t case TOK_SLASH: out << "TOK_SLASH"; break; case TOK_DASH: out << "TOK_DASH"; break; case TOK_DOT: out << "TOK_DOT"; break; - case TOK_A_YEAR: out << "TOK_A_YEAR"; break; case TOK_A_MONTH: out << "TOK_A_MONTH"; break; case TOK_A_WDAY: out << "TOK_A_WDAY"; break; case TOK_AGO: out << "TOK_AGO"; break; @@ -727,7 +722,11 @@ void date_parser_t::determine_when(date_parser_t::lexer_t::token_t& tok, when += gregorian::days(amount * adjust); break; default: - specifier.day = date_specifier_t::day_type(amount); + if (amount > 31) { + specifier.year = date_specifier_t::year_type(amount); + } else { + specifier.day = date_specifier_t::day_type(amount); + } break; } @@ -832,16 +831,13 @@ void date_parser_t::determine_when(date_parser_t::lexer_t::token_t& tok, break; } - case lexer_t::token_t::TOK_A_YEAR: - specifier.year = boost::get(*tok.value); - break; case lexer_t::token_t::TOK_A_MONTH: specifier.month = date_specifier_t::month_type (boost::get(*tok.value)); tok = lexer.peek_token(); switch (tok.kind) { - case lexer_t::token_t::TOK_A_YEAR: + case lexer_t::token_t::TOK_INT: specifier.year = boost::get(*tok.value); break; case lexer_t::token_t::END_REACHED: @@ -898,12 +894,6 @@ date_interval_t date_parser_t::parse() determine_when(tok, *inclusion_specifier); break; - case lexer_t::token_t::TOK_A_YEAR: - if (! inclusion_specifier) - inclusion_specifier = date_specifier_t(); - determine_when(tok, *inclusion_specifier); - break; - case lexer_t::token_t::TOK_A_MONTH: if (! inclusion_specifier) inclusion_specifier = date_specifier_t(); @@ -1612,13 +1602,8 @@ date_parser_t::lexer_t::token_t date_parser_t::lexer_t::next_token() if (! term.empty()) { if (std::isdigit(term[0])) { - if (term.length() == 4) - return token_t(token_t::TOK_A_YEAR, - token_t::content_t - (lexical_cast(term))); - else - return token_t(token_t::TOK_INT, - token_t::content_t(lexical_cast(term))); + return token_t(token_t::TOK_INT, + token_t::content_t(lexical_cast(term))); } else if (std::isalpha(term[0])) { to_lower(term); -- cgit v1.2.3