From aa9b07d79bff00506b913d1e56575c3859fc173f Mon Sep 17 00:00:00 2001 From: John Wiegley Date: Sun, 26 Feb 2012 15:45:15 -0600 Subject: Added --rich-data for 'convert', and SHA1 checksum checking --- doc/ledger.1 | 1 + src/convert.cc | 91 ++++++++-------------- src/csv.cc | 162 ++++++++++++++++++--------------------- src/csv.h | 37 +++++++-- src/item.cc | 2 +- src/journal.cc | 11 +++ src/journal.h | 3 + src/report.cc | 1 + src/report.h | 3 + src/textual.cc | 10 ++- src/utils.cc | 4 +- test/baseline/opt-rich-data.test | 0 12 files changed, 167 insertions(+), 158 deletions(-) create mode 100644 test/baseline/opt-rich-data.test diff --git a/doc/ledger.1 b/doc/ledger.1 index 9fe5c84c..21d43ead 100644 --- a/doc/ledger.1 +++ b/doc/ledger.1 @@ -397,6 +397,7 @@ appeared in the original journal file. .It Fl \-revalued .It Fl \-revalued-only .It Fl \-revalued-total Ar EXPR +.It Fl \-rich-data .It Fl \-seed Ar INT .It Fl \-script .It Fl \-sort Ar EXPR Pq Fl S diff --git a/src/convert.cc b/src/convert.cc index 1ef3a413..da4569cc 100644 --- a/src/convert.cc +++ b/src/convert.cc @@ -56,72 +56,41 @@ value_t convert_command(call_scope_t& args) account_t * bucket = journal.master->find_account(bucket_name); account_t * unknown = journal.master->find_account(_("Expenses:Unknown")); - // Make an amounts mapping for the account under consideration - - typedef std::map > post_map_t; - post_map_t post_map; - - xacts_iterator journal_iter(journal); - while (xact_t * xact = *journal_iter++) { - post_t * post = NULL; - xact_posts_iterator xact_iter(*xact); - while ((post = *xact_iter++) != NULL) { - if (post->account == bucket) - break; - } - if (post) { - post_map_t::iterator i = post_map.find(post->amount); - if (i == post_map.end()) { - std::list post_list; - post_list.push_back(post); - post_map.insert(post_map_t::value_type(post->amount, post_list)); - } else { - (*i).second.push_back(post); - } - } - } - // Create a flat list xacts_list current_xacts(journal.xacts_begin(), journal.xacts_end()); // Read in the series of transactions from the CSV file print_xacts formatter(report); - ifstream data(path(args.get(0))); - csv_reader reader(data); - - while (xact_t * xact = reader.read_xact(journal, bucket)) { - if (report.HANDLED(invert)) { - foreach (post_t * post, xact->posts) - post->amount.in_place_negate(); - } + path csv_file_path(args.get(0)); + ifstream data(csv_file_path); + csv_reader reader(data, csv_file_path); + + try { + while (xact_t * xact = reader.read_xact(journal, bucket, + report.HANDLED(rich_data))) { + if (report.HANDLED(invert)) { + foreach (post_t * post, xact->posts) + post->amount.in_place_negate(); + } - bool matched = false; - if (! xact->posts.front()->amount.is_null()) { - post_map_t::iterator i = post_map.find(- xact->posts.front()->amount); - if (i != post_map.end()) { - std::list& post_list((*i).second); - foreach (post_t * post, post_list) { - if (xact->code && post->xact->code && - *xact->code == *post->xact->code) { - matched = true; - break; - } - else if (xact->actual_date() == post->actual_date()) { - matched = true; - break; - } - } + string ref = (xact->has_tag(_("SHA1")) ? + xact->get_tag(_("SHA1"))->to_string() : + sha1sum(reader.get_last_line())); + + checksum_map_t::const_iterator entry = journal.checksum_map.find(ref); + if (entry != journal.checksum_map.end()) { + INFO(file_context(reader.get_pathname(), + reader.get_linenum()) + << "Ignoring known SHA1 " << ref); + checked_delete(xact); // ignore it + continue; } - } - if (matched) { - DEBUG("convert.csv", "Ignored xact with code: " << *xact->code); - checked_delete(xact); // ignore it - } - else { + if (report.HANDLED(rich_data) && ! xact->has_tag(_("SHA1"))) + xact->set_tag(_("SHA1"), string_value(ref)); + if (xact->posts.front()->account == NULL) { - // jww (2010-03-07): Bind this logic to an option: --auto-match if (account_t * acct = (report.HANDLED(auto_match) ? lookup_probable_account(xact->payee, current_xacts.rbegin(), @@ -143,8 +112,16 @@ value_t convert_command(call_scope_t& args) formatter(*post); } } + formatter.flush(); + } + catch (const std::exception&) { + add_error_context(_("While parsing file %1") + << file_context(reader.get_pathname(), + reader.get_linenum())); + add_error_context(_("While parsing CSV line:")); + add_error_context(line_context(reader.get_last_line())); + throw; } - formatter.flush(); // If not, transform the payee according to regexps diff --git a/src/csv.cc b/src/csv.cc index e2ba523d..c253f246 100644 --- a/src/csv.cc +++ b/src/csv.cc @@ -70,10 +70,12 @@ string csv_reader::read_field(std::istream& sin) else { while (sin.good() && ! sin.eof()) { sin.get(c); - if (c == ',') - break; - if (c != '\0') - field += c; + if (sin.good()) { + if (c == ',') + break; + if (c != '\0') + field += c; + } } } trim(field); @@ -82,8 +84,6 @@ string csv_reader::read_field(std::istream& sin) char * csv_reader::next_line(std::istream& sin) { - static char linebuf[MAX_LINE + 1]; - while (sin.good() && ! sin.eof() && sin.peek() == '#') sin.getline(linebuf, MAX_LINE); @@ -130,11 +130,13 @@ void csv_reader::read_index(std::istream& sin) } } -xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket) +xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket, + bool rich_data) { char * line = next_line(in); if (! line || index.empty()) return NULL; + linenum++; std::istringstream instr(line); @@ -144,20 +146,18 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket) xact->set_state(item_t::CLEARED); xact->pos = position_t(); - xact->pos->pathname = "jww (2010-03-05): unknown"; + xact->pos->pathname = pathname; xact->pos->beg_pos = in.tellg(); - xact->pos->beg_line = 0; - xact->pos->sequence = 0; + xact->pos->beg_line = linenum; + xact->pos->sequence = sequence++; post->xact = xact.get(); -#if 0 post->pos = position_t(); post->pos->pathname = pathname; - post->pos->beg_pos = line_beg_pos; + post->pos->beg_pos = in.tellg(); post->pos->beg_line = linenum; - post->pos->sequence = context.sequence++; -#endif + post->pos->sequence = sequence++; post->set_state(item_t::CLEARED); post->account = NULL; @@ -167,88 +167,80 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket) string total; string field; - try { - while (instr.good() && ! instr.eof()) { - field = read_field(instr); + while (instr.good() && ! instr.eof()) { + field = read_field(instr); - switch (index[n]) { - case FIELD_DATE: - xact->_date = parse_date(field); - break; + switch (index[n]) { + case FIELD_DATE: + xact->_date = parse_date(field); + break; - case FIELD_DATE_EFF: - xact->_date_eff = parse_date(field); - break; + case FIELD_DATE_EFF: + xact->_date_eff = parse_date(field); + break; - case FIELD_CODE: - if (! field.empty()) - xact->code = field; - break; + case FIELD_CODE: + if (! field.empty()) + xact->code = field; + break; - case FIELD_PAYEE: { - bool found = false; - foreach (payee_mapping_t& value, journal.payee_mappings) { - DEBUG("csv.mappings", "Looking for payee mapping: " << value.first); - if (value.first.match(field)) { - xact->payee = value.second; - found = true; - break; - } + case FIELD_PAYEE: { + bool found = false; + foreach (payee_mapping_t& value, journal.payee_mappings) { + DEBUG("csv.mappings", "Looking for payee mapping: " << value.first); + if (value.first.match(field)) { + xact->payee = value.second; + found = true; + break; } - if (! found) - xact->payee = field; - break; } + if (! found) + xact->payee = field; + break; + } - case FIELD_AMOUNT: { - std::istringstream amount_str(field); - amt.parse(amount_str, PARSE_NO_REDUCE); - if (! amt.has_commodity() && - commodity_pool_t::current_pool->default_commodity) - amt.set_commodity(*commodity_pool_t::current_pool->default_commodity); - post->amount = amt; - break; - } + case FIELD_AMOUNT: { + std::istringstream amount_str(field); + amt.parse(amount_str, PARSE_NO_REDUCE); + if (! amt.has_commodity() && + commodity_pool_t::current_pool->default_commodity) + amt.set_commodity(*commodity_pool_t::current_pool->default_commodity); + post->amount = amt; + break; + } - case FIELD_COST: { - std::istringstream amount_str(field); - amt.parse(amount_str, PARSE_NO_REDUCE); - if (! amt.has_commodity() && - commodity_pool_t::current_pool->default_commodity) - amt.set_commodity - (*commodity_pool_t::current_pool->default_commodity); - post->cost = amt; - break; - } + case FIELD_COST: { + std::istringstream amount_str(field); + amt.parse(amount_str, PARSE_NO_REDUCE); + if (! amt.has_commodity() && + commodity_pool_t::current_pool->default_commodity) + amt.set_commodity + (*commodity_pool_t::current_pool->default_commodity); + post->cost = amt; + break; + } - case FIELD_TOTAL: - total = field; - break; + case FIELD_TOTAL: + total = field; + break; - case FIELD_NOTE: - xact->note = field; - break; + case FIELD_NOTE: + xact->note = field; + break; - case FIELD_UNKNOWN: - if (! names[n].empty() && ! field.empty()) - xact->set_tag(names[n], string_value(field)); - break; - } - n++; + case FIELD_UNKNOWN: + if (! names[n].empty() && ! field.empty()) + xact->set_tag(names[n], string_value(field)); + break; } - } - catch (const std::exception&) { - add_error_context(_("While parsing CSV field:")); - add_error_context(line_context(field)); - throw; + n++; } -#if 0 - xact->set_tag(_("Imported"), - string(format_date(CURRENT_DATE(), FMT_WRITTEN))); - xact->set_tag(_("Original"), string(line)); - xact->set_tag(_("SHA1"), string(sha1sum(line))); -#endif + if (rich_data) { + xact->set_tag(_("Imported"), + string_value(format_date(CURRENT_DATE(), FMT_WRITTEN))); + xact->set_tag(_("CSV"), string_value(line)); + } // Translate the account name, if we have enough information to do so @@ -267,13 +259,11 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket) post->xact = xact.get(); -#if 0 post->pos = position_t(); post->pos->pathname = pathname; - post->pos->beg_pos = line_beg_pos; + post->pos->beg_pos = in.tellg(); post->pos->beg_line = linenum; - post->pos->sequence = context.sequence++; -#endif + post->pos->sequence = sequence++; post->set_state(item_t::CLEARED); post->account = bucket; diff --git a/src/csv.h b/src/csv.h index 5ff8b59e..cf350e9d 100644 --- a/src/csv.h +++ b/src/csv.h @@ -52,9 +52,13 @@ class account_t; class csv_reader { - static const std::size_t MAX_LINE = 1024; + static const std::size_t MAX_LINE = 4096; std::istream& in; + path pathname; + char linebuf[MAX_LINE]; + std::size_t linenum; + std::size_t sequence; enum headers_t { FIELD_DATE = 0, @@ -80,13 +84,11 @@ class csv_reader std::vector index; std::vector names; - std::vector fields; - - typedef std::map string_map; public: - csv_reader(std::istream& _in) - : in(_in), + csv_reader(std::istream& _in, const path& _pathname) + : in(_in), pathname(_pathname), + linenum(0), sequence(0), date_mask("date"), date_eff_mask("posted( ?date)?"), code_mask("code"), @@ -98,11 +100,30 @@ public: read_index(in); } + void read_index(std::istream& in); string read_field(std::istream& in); char * next_line(std::istream& in); - void read_index(std::istream& in); - xact_t * read_xact(journal_t& journal, account_t * bucket); + xact_t * read_xact(journal_t& journal, account_t * bucket, bool rich_data); + + const char * get_last_line() const { + return linebuf; + } + + path get_pathname() const { + return pathname; + } + std::size_t get_linenum() const { + return linenum; + } + + void reset() { + pathname.clear(); + index.clear(); + names.clear(); + linenum = 0; + sequence = 0; + } }; } // namespace ledger diff --git a/src/item.cc b/src/item.cc index 056aa04c..7184c0ef 100644 --- a/src/item.cc +++ b/src/item.cc @@ -72,7 +72,7 @@ bool item_t::has_tag(const mask_t& tag_mask, return false; } - optional item_t::get_tag(const string& tag, bool) const +optional item_t::get_tag(const string& tag, bool) const { DEBUG("item.meta", "Getting item tag: " << tag); if (metadata) { diff --git a/src/journal.cc b/src/journal.cc index 0691954f..bbfa205c 100644 --- a/src/journal.cc +++ b/src/journal.cc @@ -107,6 +107,17 @@ account_t * journal_t::find_account_re(const string& regexp) bool journal_t::add_xact(xact_t * xact) { + if (optional ref = xact->get_tag(_("SHA1"))) { + std::pair result + = checksum_map.insert(checksum_map_t::value_type(ref->to_string(), xact)); + if (! result.second) { + throw_(std::runtime_error, + _("Found duplicated transaction with SHA1: ") + << ref->to_string()); + return false; + } + } + xact->journal = this; if (! xact->finalize()) { diff --git a/src/journal.h b/src/journal.h index ca6b6e4f..49a6292b 100644 --- a/src/journal.h +++ b/src/journal.h @@ -63,6 +63,7 @@ typedef std::pair payee_mapping_t; typedef std::list payee_mappings_t; typedef std::pair account_mapping_t; typedef std::list account_mappings_t; +typedef std::map checksum_map_t; class journal_t : public noncopyable { @@ -117,6 +118,7 @@ public: std::list sources; payee_mappings_t payee_mappings; account_mappings_t account_mappings; + checksum_map_t checksum_map; bool was_loaded; journal_t(); @@ -198,6 +200,7 @@ private: ar & sources; ar & payee_mappings; ar & account_mappings; + ar & checksum_map; } #endif // HAVE_BOOST_SERIALIZATION }; diff --git a/src/report.cc b/src/report.cc index b3b7233f..c562ab38 100644 --- a/src/report.cc +++ b/src/report.cc @@ -1094,6 +1094,7 @@ option_t * report_t::lookup_option(const char * p) else OPT(revalued); else OPT(revalued_only); else OPT(revalued_total_); + else OPT(rich_data); break; case 's': OPT(sort_); diff --git a/src/report.h b/src/report.h index a001ffb1..565728df 100644 --- a/src/report.h +++ b/src/report.h @@ -313,6 +313,7 @@ public: HANDLER(revalued).report(out); HANDLER(revalued_only).report(out); HANDLER(revalued_total_).report(out); + HANDLER(rich_data).report(out); HANDLER(seed_).report(out); HANDLER(sort_).report(out); HANDLER(sort_all_).report(out); @@ -893,6 +894,8 @@ public: set_expr(args.get(0), args.get(1)); }); + OPTION(report_t, rich_data); + OPTION(report_t, seed_); OPTION_(report_t, sort_, DO_(args) { // -S diff --git a/src/textual.cc b/src/textual.cc index ddbd9943..13032236 100644 --- a/src/textual.cc +++ b/src/textual.cc @@ -1150,8 +1150,9 @@ post_t * instance_t::parse_post(char * line, if (context.strict && ! post->account->has_flags(ACCOUNT_KNOWN)) { if (post->_state == item_t::UNCLEARED) - warning_(_("\"%1\", line %2: Unknown account '%3'") - << pathname.string() << linenum << post->account->fullname()); + warning_(_("%1Unknown account '%2'") + << file_context(pathname, linenum) + << post->account->fullname()); post->account->add_flags(ACCOUNT_KNOWN); } @@ -1181,8 +1182,9 @@ post_t * instance_t::parse_post(char * line, if (context.strict && ! post->amount.commodity().has_flags(COMMODITY_KNOWN)) { if (post->_state == item_t::UNCLEARED) - warning_(_("\"%1\", line %2: Unknown commodity '%3'") - << pathname.string() << linenum << post->amount.commodity()); + warning_(_("%1Unknown commodity '%2'") + << file_context(pathname, linenum) + << post->amount.commodity()); post->amount.commodity().add_flags(COMMODITY_KNOWN); } diff --git a/src/utils.cc b/src/utils.cc index 42600db3..2f64bb0a 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -50,8 +50,8 @@ void debug_assert(const string& reason, std::size_t line) { std::ostringstream buf; - buf << "Assertion failed in \"" << file << "\", line " << line - << ": " << func << ": " << reason; + buf << "Assertion failed in " << file_context(file, line) + << func << ": " << reason; throw assertion_failed(buf.str()); } diff --git a/test/baseline/opt-rich-data.test b/test/baseline/opt-rich-data.test new file mode 100644 index 00000000..e69de29b -- cgit v1.2.3