From 3e91c3bf2c3662c40f0fe7c9cf197f6b0c725269 Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Tue, 17 Nov 2009 22:23:46 -0500 Subject: Added several new types for working with dates and ranges date_specifier_t :: This is like a plain date_t, except it knows what wasn't specified. For example, if 2008/06 is parsed, it becomes date_specifier_t which knows that no day was given. If you ask for the begin() date of the specifier, it will be 2008/06/01; the end() date (which is exclusive) will be 2008/07/01. date_range_t :: A date range is a range of two specifiers, either of which (but not both) may be omitted. This makes it possible to represent expressions like "from june to july", where no day or year is given. The exact dates will be inferred by using the current year, and fixing the range from YEAR/06/01 to YEAR/07/01. That is, the range goes from the begin() of one date specifier to the begin() of the other. date_specifier_or_range_t :: A variadic type that can be either a date_specifier_t or a date_range_t. It's just a wrapper to represent the fact that ranges can be implicit via specifiers (such as, "in june"), or explicit via ranges ("since 2008"). --- src/times.cc | 288 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/times.h | 327 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 615 insertions(+) diff --git a/src/times.cc b/src/times.cc index 2fb8479b..3070249c 100644 --- a/src/times.cc +++ b/src/times.cc @@ -318,6 +318,43 @@ date_t parse_date(const char * str, optional current_year) return parse_date_mask(str, current_year); } +date_t +date_specifier_t::begin(const optional& current_year) const +{ + assert(year || current_year); + + year_type the_year = year ? *year : static_cast(*current_year); + month_type the_month = month ? *month : date_t::month_type(1); + day_type the_day = day ? *day : date_t::day_type(1); + + if (day) + assert(! wday); + else if (wday) + assert(! day); + + // jww (2009-11-16): Handle wday. If a month is set, find the most recent + // wday in that month; if the year is set, then in that year. + + return gregorian::date(static_cast(the_year), + static_cast(the_month), + static_cast(the_day)); +} + +date_t +date_specifier_t::end(const optional& current_year) const +{ + if (day || wday) + return begin(current_year) + gregorian::days(1); + else if (month) + return begin(current_year) + gregorian::months(1); + else if (year) + return begin(current_year) + gregorian::years(1); + else { + assert(false); + return date_t(); + } +} + std::ostream& operator<<(std::ostream& out, const date_duration_t& duration) { @@ -758,6 +795,171 @@ void date_interval_t::parse(std::istream& in) } } +date_parser_t::lexer_t::token_t date_parser_t::lexer_t::next_token() +{ + if (token_cache.kind != token_t::UNKNOWN) { + token_t tok = token_cache; + token_cache = token_t(); + return tok; + } + + while (begin != end && std::isspace(*begin)) + begin++; + + if (begin == end) + return token_t(token_t::END_REACHED); + + switch (*begin) { + case '/': ++begin; return token_t(token_t::TOK_SLASH); + case '-': ++begin; return token_t(token_t::TOK_DASH); + case '.': ++begin; return token_t(token_t::TOK_DOT); + default: break; + } + + string::const_iterator start = begin; + + // If the first character is a digit, try parsing the whole argument as a + // date using the typical date formats. This allows not only dates like + // "2009/08/01", but also dates that fit the user's --input-date-format, + // assuming their format fits in one argument and begins with a digit. + if (std::isdigit(*begin)) { + try { + string::const_iterator i = begin; + for (i = begin; i != end && ! std::isspace(*i); i++) {} + assert(i != begin); + + string possible_date(start, i); + date_traits_t traits; + + date_t when = parse_date_mask(possible_date.c_str(), none, &traits); + if (! when.is_not_a_date()) { + begin = i; + return token_t(token_t::TOK_DATE, + token_t::content_t(date_specifier_t(when, traits))); + } + } + catch (...) {} + } + + string term; + bool alnum = std::isalnum(*begin); + for (start = begin; (begin != end && ! std::isspace(*begin) && + alnum == std::isalnum(*begin)); begin++) + term.push_back(*begin); + + if (! term.empty()) { + if (std::isdigit(term[0])) { + return token_t(term.length() == 4 ? + token_t::TOK_A_YEAR : token_t::TOK_INT, + token_t::content_t(lexical_cast(term))); + } + else if (std::isalpha(term[0])) { + if (optional month = + string_to_month_of_year(term)) { + date_specifier_t specifier; + specifier.month = static_cast(*month); + return token_t(token_t::TOK_A_MONTH, token_t::content_t(specifier)); + } + else if (optional wday = + string_to_day_of_week(term)) { + date_specifier_t specifier; + specifier.wday = static_cast(*wday); + return token_t(token_t::TOK_A_WDAY, token_t::content_t(specifier)); + } + else if (term == _("from") || term == _("since")) + return token_t(token_t::TOK_SINCE); + else if (term == _("to") || term == _("until")) + return token_t(token_t::TOK_UNTIL); + else if (term == _("in")) + return token_t(token_t::TOK_IN); + else if (term == _("this")) + return token_t(token_t::TOK_THIS); + else if (term == _("next")) + return token_t(token_t::TOK_NEXT); + else if (term == _("last")) + return token_t(token_t::TOK_LAST); + else if (term == _("year")) + return token_t(token_t::TOK_YEAR); + else if (term == _("quarter")) + return token_t(token_t::TOK_QUARTER); + else if (term == _("month")) + return token_t(token_t::TOK_MONTH); + else if (term == _("week")) + return token_t(token_t::TOK_WEEK); + else if (term == _("day")) + return token_t(token_t::TOK_DAY); + else if (term == _("yearly")) + return token_t(token_t::TOK_YEARLY); + else if (term == _("quarterly")) + return token_t(token_t::TOK_QUARTERLY); + else if (term == _("bimonthly")) + return token_t(token_t::TOK_BIMONTHLY); + else if (term == _("monthly")) + return token_t(token_t::TOK_MONTHLY); + else if (term == _("biweekly")) + return token_t(token_t::TOK_BIWEEKLY); + else if (term == _("weekly")) + return token_t(token_t::TOK_WEEKLY); + else if (term == _("daily")) + return token_t(token_t::TOK_DAILY); + else if (term == _("years")) + return token_t(token_t::TOK_YEARS); + else if (term == _("quarters")) + return token_t(token_t::TOK_QUARTERS); + else if (term == _("months")) + return token_t(token_t::TOK_MONTHS); + else if (term == _("weeks")) + return token_t(token_t::TOK_WEEKS); + else if (term == _("days")) + return token_t(token_t::TOK_DAYS); + } + else { + token_t::expected('\0', term[0]); + begin = ++start; + } + } else { + token_t::expected('\0', *begin); + } + + return token_t(token_t::UNKNOWN); +} + +void date_parser_t::lexer_t::token_t::unexpected() +{ + kind_t prev_kind = kind; + + kind = UNKNOWN; + + switch (prev_kind) { + case END_REACHED: + throw_(date_error, _("Unexpected end of expression")); + default: + throw_(date_error, _("Unexpected token '%1'") << to_string()); + } +} + +void date_parser_t::lexer_t::token_t::expected(char wanted, char c) +{ + if (c == '\0' || c == -1) { + if (wanted == '\0' || wanted == -1) + throw_(date_error, _("Unexpected end")); + else + throw_(date_error, _("Missing '%1'") << wanted); + } else { + if (wanted == '\0' || wanted == -1) + throw_(date_error, _("Invalid char '%1'") << c); + else + throw_(date_error, _("Invalid char '%1' (wanted '%2')") << c << wanted); + } +} + +date_interval_t date_parser_t::parse_date_expr() +{ + date_interval_t interval; + + return interval; +} + namespace { typedef std::map datetime_io_map; typedef std::map date_io_map; @@ -880,4 +1082,90 @@ void times_shutdown() is_initialized = false; } } + +void show_period_tokens(std::ostream& out, const string& arg) +{ + date_parser_t::lexer_t lexer(arg.begin(), arg.end()); + + date_parser_t::lexer_t::token_t token; + do { + token = lexer.next_token(); + out << _("token: ") << token.to_string() << std::endl; + } + while (token.kind != date_parser_t::lexer_t::token_t::END_REACHED); +} + +void analyze_period(std::ostream& out, const string& arg) +{ + date_parser_t date_parser(arg); + + date_interval_t interval = date_parser.parse(); + + out << _("global details => ") << std::endl << std::endl; + + if (interval.start) + out << _(" start: ") << format_date(*interval.start) << std::endl; + else + out << _(" start: TODAY: ") << format_date(CURRENT_DATE()) << std::endl; + if (interval.finish) + out << _(" finish: ") << format_date(*interval.finish) << std::endl; + + if (interval.skip_duration) + out << _(" skip: ") << *interval.skip_duration << std::endl; + if (interval.factor) + out << _(" factor: ") << interval.factor << std::endl; + if (interval.duration) + out << _("duration: ") << *interval.duration << std::endl; + + if (interval.find_period(interval.start ? + *interval.start : CURRENT_DATE())) { + out << std::endl + << _("after finding first period => ") << std::endl + << std::endl; + + if (interval.start) + out << _(" start: ") << format_date(*interval.start) << std::endl; + if (interval.finish) + out << _(" finish: ") << format_date(*interval.finish) << std::endl; + + if (interval.skip_duration) + out << _(" skip: ") << *interval.skip_duration << std::endl; + if (interval.factor) + out << _(" factor: ") << interval.factor << std::endl; + if (interval.duration) + out << _("duration: ") << *interval.duration << std::endl; + + out << std::endl; + + for (int i = 0; i < 20 && interval; i++, ++interval) { + out << std::right; + out.width(2); + + out << i << "): " << format_date(*interval.start); + if (interval.end_of_duration) + out << " -- " << format_date(*interval.inclusive_end()); + out << std::endl; + + if (! interval.skip_duration) + break; + } + } +} + } // namespace ledger + +#if defined(TIMES_HARNESS) + +int main(int argc, char *argv[]) +{ + if (argc > 1) { + ledger::times_initialize(); + ledger::analyze_period(std::cout, argv[1]); + ledger::times_shutdown(); + } else { + std::cerr << "Usage: times " << std::endl; + } + return 0; +} + +#endif // TIMES_HARNESS diff --git a/src/times.h b/src/times.h index c50b0366..b13f48b1 100644 --- a/src/times.h +++ b/src/times.h @@ -181,6 +181,105 @@ private: #endif // HAVE_BOOST_SERIALIZATION }; +class date_specifier_t +{ + friend class date_parser_t; + +#if 0 + typedef date_t::year_type year_type; +#else + typedef unsigned short year_type; +#endif + typedef date_t::month_type month_type; + typedef date_t::day_type day_type; + typedef date_t::day_of_week_type day_of_week_type; + + optional year; + optional month; + optional day; + optional wday; + +public: + date_specifier_t() {} + date_specifier_t(const date_t& date, const date_traits_t& traits) { + if (traits.has_year) + year = date.year(); + if (traits.has_month) + month = date.month(); + if (traits.has_day) + day = date.day(); + } + + date_t begin(const optional& current_year = none) const; + date_t end(const optional& current_year = none) const; + + bool is_within(const date_t& date, + const optional& current_year = none) const { + return date >= begin(current_year) && date < end(current_year); + } + +#if defined(HAVE_BOOST_SERIALIZATION) +private: + /** Serialization. */ + + friend class boost::serialization::access; + + template + void serialize(Archive& ar, const unsigned int /* version */) { + ar & year; + ar & month; + ar & day; + ar & wday; + } +#endif // HAVE_BOOST_SERIALIZATION +}; + +class date_range_t +{ + friend class date_parser_t; + + optional range_begin; + optional range_end; + +public: + optional + begin(const optional& current_year = none) const { + if (range_begin) + return range_begin->begin(current_year); + else + return none; + } + optional + end(const optional& current_year = none) const { + if (range_end) + return range_end->end(current_year); + else + return none; + } + + bool is_within(const date_t& date, + const optional& current_year = none) const { + optional b = begin(current_year); + optional e = end(current_year); + bool after_begin = b ? date >= *b : true; + bool before_end = e ? date < *e : true; + return after_begin && before_end; + } + +#if defined(HAVE_BOOST_SERIALIZATION) +private: + /** Serialization. */ + + friend class boost::serialization::access; + + template + void serialize(Archive& ar, const unsigned int /* version */) { + ar & range_begin; + ar & range_end; + } +#endif // HAVE_BOOST_SERIALIZATION +}; + struct date_duration_t { enum skip_quantum_t { @@ -247,6 +346,45 @@ private: #endif // HAVE_BOOST_SERIALIZATION }; +class date_specifier_or_range_t +{ + typedef variant value_type; + + value_type specifier_or_range; + +public: + optional + begin(const optional& current_year = none) const { + if (specifier_or_range.type() == typeid(date_specifier_t)) + return boost::get(specifier_or_range).begin(current_year); + else if (specifier_or_range.type() == typeid(date_range_t)) + return boost::get(specifier_or_range).begin(current_year); + else + return none; + } + optional + end(const optional& current_year = none) const { + if (specifier_or_range.type() == typeid(date_specifier_t)) + return boost::get(specifier_or_range).end(current_year); + else if (specifier_or_range.type() == typeid(date_range_t)) + return boost::get(specifier_or_range).end(current_year); + else + return none; + } + +#if defined(HAVE_BOOST_SERIALIZATION) +private: + /** Serialization. */ + + friend class boost::serialization::access; + + template + void serialize(Archive& ar, const unsigned int /* version */) { + ar & specifier_or_range; + } +#endif // HAVE_BOOST_SERIALIZATION +}; + class date_interval_t : public equality_comparable { public: @@ -343,9 +481,198 @@ private: #endif // HAVE_BOOST_SERIALIZATION }; +class date_parser_t +{ + friend void show_period_tokens(std::ostream& out, const string& arg); + + class lexer_t + { + friend class date_parser_t; + + string::const_iterator begin; + string::const_iterator end; + + public: + struct token_t + { + enum kind_t { + UNKNOWN, + + TOK_DATE, + TOK_INT, + TOK_SLASH, + TOK_DASH, + TOK_DOT, + + TOK_A_YEAR, + TOK_A_MONTH, + TOK_A_DAY, + TOK_A_WDAY, + + TOK_SINCE, + TOK_UNTIL, + TOK_IN, + TOK_THIS, + TOK_NEXT, + TOK_LAST, + + TOK_YEAR, + TOK_QUARTER, + TOK_MONTH, + TOK_WEEK, + TOK_DAY, + + TOK_YEARLY, + TOK_QUARTERLY, + TOK_BIMONTHLY, + TOK_MONTHLY, + TOK_BIWEEKLY, + TOK_WEEKLY, + TOK_DAILY, + + TOK_YEARS, + TOK_QUARTERS, + TOK_MONTHS, + TOK_WEEKS, + TOK_DAYS, + + END_REACHED + + } kind; + + typedef variant content_t; + + optional value; + + explicit token_t(kind_t _kind = UNKNOWN, + const optional& _value = none) + : kind(_kind), value(_value) { + TRACE_CTOR(date_parser_t::lexer_t::token_t, ""); + } + token_t(const token_t& tok) + : kind(tok.kind), value(tok.value) { + TRACE_CTOR(date_parser_t::lexer_t::token_t, "copy"); + } + ~token_t() throw() { + TRACE_DTOR(date_parser_t::lexer_t::token_t); + } + + token_t& operator=(const token_t& tok) { + if (this != &tok) { + kind = tok.kind; + value = tok.value; + } + return *this; + } + + operator bool() const { + return kind != END_REACHED; + } + + string to_string() const { + switch (kind) { + case UNKNOWN: return "UNKNOWN"; + case TOK_DATE: return "TOK_DATE"; + case TOK_INT: return "TOK_INT"; + case TOK_SLASH: return "TOK_SLASH"; + case TOK_DASH: return "TOK_DASH"; + case TOK_DOT: return "TOK_DOT"; + case TOK_A_YEAR: return "TOK_A_YEAR"; + case TOK_A_MONTH: return "TOK_A_MONTH"; + case TOK_A_DAY: return "TOK_A_DAY"; + case TOK_A_WDAY: return "TOK_A_WDAY"; + case TOK_SINCE: return "TOK_SINCE"; + case TOK_UNTIL: return "TOK_UNTIL"; + case TOK_IN: return "TOK_IN"; + case TOK_THIS: return "TOK_THIS"; + case TOK_NEXT: return "TOK_NEXT"; + case TOK_LAST: return "TOK_LAST"; + case TOK_YEAR: return "TOK_YEAR"; + case TOK_QUARTER: return "TOK_QUARTER"; + case TOK_MONTH: return "TOK_MONTH"; + case TOK_WEEK: return "TOK_WEEK"; + case TOK_DAY: return "TOK_DAY"; + case TOK_YEARLY: return "TOK_YEARLY"; + case TOK_QUARTERLY: return "TOK_QUARTERLY"; + case TOK_BIMONTHLY: return "TOK_BIMONTHLY"; + case TOK_MONTHLY: return "TOK_MONTHLY"; + case TOK_BIWEEKLY: return "TOK_BIWEEKLY"; + case TOK_WEEKLY: return "TOK_WEEKLY"; + case TOK_DAILY: return "TOK_DAILY"; + case TOK_YEARS: return "TOK_YEARS"; + case TOK_QUARTERS: return "TOK_QUARTERS"; + case TOK_MONTHS: return "TOK_MONTHS"; + case TOK_WEEKS: return "TOK_WEEKS"; + case TOK_DAYS: return "TOK_DAYS"; + case END_REACHED: return "END_REACHED"; + } + assert(false); + return empty_string; + } + + void unexpected(); + static void expected(char wanted, char c = '\0'); + }; + + token_t token_cache; + + lexer_t(string::const_iterator _begin, + string::const_iterator _end) + : begin(_begin), end(_end) + { + TRACE_CTOR(date_parser_t::lexer_t, ""); + } + lexer_t(const lexer_t& lexer) + : begin(lexer.begin), end(lexer.end), + token_cache(lexer.token_cache) + { + TRACE_CTOR(date_parser_t::lexer_t, "copy"); + } + ~lexer_t() throw() { + TRACE_DTOR(date_parser_t::lexer_t); + } + + token_t next_token(); + void push_token(token_t tok) { + assert(token_cache.kind == token_t::UNKNOWN); + token_cache = tok; + } + token_t peek_token() { + if (token_cache.kind == token_t::UNKNOWN) + token_cache = next_token(); + return token_cache; + } + }; + + string arg; + lexer_t lexer; + + date_interval_t parse_date_expr(); + +public: + date_parser_t(const string& _arg) + : arg(_arg), lexer(arg.begin(), arg.end()) { + TRACE_CTOR(date_parser_t, ""); + } + date_parser_t(const date_parser_t& parser) + : arg(parser.arg), lexer(parser.lexer) { + TRACE_CTOR(date_parser_t, "copy"); + } + ~date_parser_t() throw() { + TRACE_DTOR(date_parser_t); + } + + date_interval_t parse() { + return date_interval_t(); + } +}; + void times_initialize(); void times_shutdown(); +void show_period_tokens(std::ostream& out, const string& arg); +void analyze_period(std::ostream& out, const string& arg); + std::ostream& operator<<(std::ostream& out, const date_duration_t& duration); } // namespace ledger -- cgit v1.2.3