summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2012-02-26 15:45:15 -0600
committerJohn Wiegley <johnw@newartisans.com>2012-02-26 15:45:15 -0600
commitaa9b07d79bff00506b913d1e56575c3859fc173f (patch)
tree70bf76b03217eec5d09855f2a7bda9d5b9230fa0 /src
parent4cf95497f9afaead3d9d308dabe8e8b26949502f (diff)
downloadfork-ledger-aa9b07d79bff00506b913d1e56575c3859fc173f.tar.gz
fork-ledger-aa9b07d79bff00506b913d1e56575c3859fc173f.tar.bz2
fork-ledger-aa9b07d79bff00506b913d1e56575c3859fc173f.zip
Added --rich-data for 'convert', and SHA1 checksum checking
Diffstat (limited to 'src')
-rw-r--r--src/convert.cc91
-rw-r--r--src/csv.cc162
-rw-r--r--src/csv.h37
-rw-r--r--src/item.cc2
-rw-r--r--src/journal.cc11
-rw-r--r--src/journal.h3
-rw-r--r--src/report.cc1
-rw-r--r--src/report.h3
-rw-r--r--src/textual.cc10
-rw-r--r--src/utils.cc4
10 files changed, 166 insertions, 158 deletions
diff --git a/src/convert.cc b/src/convert.cc
index 1ef3a413..da4569cc 100644
--- a/src/convert.cc
+++ b/src/convert.cc
@@ -56,72 +56,41 @@ value_t convert_command(call_scope_t& args)
account_t * bucket = journal.master->find_account(bucket_name);
account_t * unknown = journal.master->find_account(_("Expenses:Unknown"));
- // Make an amounts mapping for the account under consideration
-
- typedef std::map<value_t, std::list<post_t *> > post_map_t;
- post_map_t post_map;
-
- xacts_iterator journal_iter(journal);
- while (xact_t * xact = *journal_iter++) {
- post_t * post = NULL;
- xact_posts_iterator xact_iter(*xact);
- while ((post = *xact_iter++) != NULL) {
- if (post->account == bucket)
- break;
- }
- if (post) {
- post_map_t::iterator i = post_map.find(post->amount);
- if (i == post_map.end()) {
- std::list<post_t *> post_list;
- post_list.push_back(post);
- post_map.insert(post_map_t::value_type(post->amount, post_list));
- } else {
- (*i).second.push_back(post);
- }
- }
- }
-
// Create a flat list
xacts_list current_xacts(journal.xacts_begin(), journal.xacts_end());
// Read in the series of transactions from the CSV file
print_xacts formatter(report);
- ifstream data(path(args.get<string>(0)));
- csv_reader reader(data);
-
- while (xact_t * xact = reader.read_xact(journal, bucket)) {
- if (report.HANDLED(invert)) {
- foreach (post_t * post, xact->posts)
- post->amount.in_place_negate();
- }
+ path csv_file_path(args.get<string>(0));
+ ifstream data(csv_file_path);
+ csv_reader reader(data, csv_file_path);
+
+ try {
+ while (xact_t * xact = reader.read_xact(journal, bucket,
+ report.HANDLED(rich_data))) {
+ if (report.HANDLED(invert)) {
+ foreach (post_t * post, xact->posts)
+ post->amount.in_place_negate();
+ }
- bool matched = false;
- if (! xact->posts.front()->amount.is_null()) {
- post_map_t::iterator i = post_map.find(- xact->posts.front()->amount);
- if (i != post_map.end()) {
- std::list<post_t *>& post_list((*i).second);
- foreach (post_t * post, post_list) {
- if (xact->code && post->xact->code &&
- *xact->code == *post->xact->code) {
- matched = true;
- break;
- }
- else if (xact->actual_date() == post->actual_date()) {
- matched = true;
- break;
- }
- }
+ string ref = (xact->has_tag(_("SHA1")) ?
+ xact->get_tag(_("SHA1"))->to_string() :
+ sha1sum(reader.get_last_line()));
+
+ checksum_map_t::const_iterator entry = journal.checksum_map.find(ref);
+ if (entry != journal.checksum_map.end()) {
+ INFO(file_context(reader.get_pathname(),
+ reader.get_linenum())
+ << "Ignoring known SHA1 " << ref);
+ checked_delete(xact); // ignore it
+ continue;
}
- }
- if (matched) {
- DEBUG("convert.csv", "Ignored xact with code: " << *xact->code);
- checked_delete(xact); // ignore it
- }
- else {
+ if (report.HANDLED(rich_data) && ! xact->has_tag(_("SHA1")))
+ xact->set_tag(_("SHA1"), string_value(ref));
+
if (xact->posts.front()->account == NULL) {
- // jww (2010-03-07): Bind this logic to an option: --auto-match
if (account_t * acct =
(report.HANDLED(auto_match) ?
lookup_probable_account(xact->payee, current_xacts.rbegin(),
@@ -143,8 +112,16 @@ value_t convert_command(call_scope_t& args)
formatter(*post);
}
}
+ formatter.flush();
+ }
+ catch (const std::exception&) {
+ add_error_context(_("While parsing file %1")
+ << file_context(reader.get_pathname(),
+ reader.get_linenum()));
+ add_error_context(_("While parsing CSV line:"));
+ add_error_context(line_context(reader.get_last_line()));
+ throw;
}
- formatter.flush();
// If not, transform the payee according to regexps
diff --git a/src/csv.cc b/src/csv.cc
index e2ba523d..c253f246 100644
--- a/src/csv.cc
+++ b/src/csv.cc
@@ -70,10 +70,12 @@ string csv_reader::read_field(std::istream& sin)
else {
while (sin.good() && ! sin.eof()) {
sin.get(c);
- if (c == ',')
- break;
- if (c != '\0')
- field += c;
+ if (sin.good()) {
+ if (c == ',')
+ break;
+ if (c != '\0')
+ field += c;
+ }
}
}
trim(field);
@@ -82,8 +84,6 @@ string csv_reader::read_field(std::istream& sin)
char * csv_reader::next_line(std::istream& sin)
{
- static char linebuf[MAX_LINE + 1];
-
while (sin.good() && ! sin.eof() && sin.peek() == '#')
sin.getline(linebuf, MAX_LINE);
@@ -130,11 +130,13 @@ void csv_reader::read_index(std::istream& sin)
}
}
-xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
+xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket,
+ bool rich_data)
{
char * line = next_line(in);
if (! line || index.empty())
return NULL;
+ linenum++;
std::istringstream instr(line);
@@ -144,20 +146,18 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
xact->set_state(item_t::CLEARED);
xact->pos = position_t();
- xact->pos->pathname = "jww (2010-03-05): unknown";
+ xact->pos->pathname = pathname;
xact->pos->beg_pos = in.tellg();
- xact->pos->beg_line = 0;
- xact->pos->sequence = 0;
+ xact->pos->beg_line = linenum;
+ xact->pos->sequence = sequence++;
post->xact = xact.get();
-#if 0
post->pos = position_t();
post->pos->pathname = pathname;
- post->pos->beg_pos = line_beg_pos;
+ post->pos->beg_pos = in.tellg();
post->pos->beg_line = linenum;
- post->pos->sequence = context.sequence++;
-#endif
+ post->pos->sequence = sequence++;
post->set_state(item_t::CLEARED);
post->account = NULL;
@@ -167,88 +167,80 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
string total;
string field;
- try {
- while (instr.good() && ! instr.eof()) {
- field = read_field(instr);
+ while (instr.good() && ! instr.eof()) {
+ field = read_field(instr);
- switch (index[n]) {
- case FIELD_DATE:
- xact->_date = parse_date(field);
- break;
+ switch (index[n]) {
+ case FIELD_DATE:
+ xact->_date = parse_date(field);
+ break;
- case FIELD_DATE_EFF:
- xact->_date_eff = parse_date(field);
- break;
+ case FIELD_DATE_EFF:
+ xact->_date_eff = parse_date(field);
+ break;
- case FIELD_CODE:
- if (! field.empty())
- xact->code = field;
- break;
+ case FIELD_CODE:
+ if (! field.empty())
+ xact->code = field;
+ break;
- case FIELD_PAYEE: {
- bool found = false;
- foreach (payee_mapping_t& value, journal.payee_mappings) {
- DEBUG("csv.mappings", "Looking for payee mapping: " << value.first);
- if (value.first.match(field)) {
- xact->payee = value.second;
- found = true;
- break;
- }
+ case FIELD_PAYEE: {
+ bool found = false;
+ foreach (payee_mapping_t& value, journal.payee_mappings) {
+ DEBUG("csv.mappings", "Looking for payee mapping: " << value.first);
+ if (value.first.match(field)) {
+ xact->payee = value.second;
+ found = true;
+ break;
}
- if (! found)
- xact->payee = field;
- break;
}
+ if (! found)
+ xact->payee = field;
+ break;
+ }
- case FIELD_AMOUNT: {
- std::istringstream amount_str(field);
- amt.parse(amount_str, PARSE_NO_REDUCE);
- if (! amt.has_commodity() &&
- commodity_pool_t::current_pool->default_commodity)
- amt.set_commodity(*commodity_pool_t::current_pool->default_commodity);
- post->amount = amt;
- break;
- }
+ case FIELD_AMOUNT: {
+ std::istringstream amount_str(field);
+ amt.parse(amount_str, PARSE_NO_REDUCE);
+ if (! amt.has_commodity() &&
+ commodity_pool_t::current_pool->default_commodity)
+ amt.set_commodity(*commodity_pool_t::current_pool->default_commodity);
+ post->amount = amt;
+ break;
+ }
- case FIELD_COST: {
- std::istringstream amount_str(field);
- amt.parse(amount_str, PARSE_NO_REDUCE);
- if (! amt.has_commodity() &&
- commodity_pool_t::current_pool->default_commodity)
- amt.set_commodity
- (*commodity_pool_t::current_pool->default_commodity);
- post->cost = amt;
- break;
- }
+ case FIELD_COST: {
+ std::istringstream amount_str(field);
+ amt.parse(amount_str, PARSE_NO_REDUCE);
+ if (! amt.has_commodity() &&
+ commodity_pool_t::current_pool->default_commodity)
+ amt.set_commodity
+ (*commodity_pool_t::current_pool->default_commodity);
+ post->cost = amt;
+ break;
+ }
- case FIELD_TOTAL:
- total = field;
- break;
+ case FIELD_TOTAL:
+ total = field;
+ break;
- case FIELD_NOTE:
- xact->note = field;
- break;
+ case FIELD_NOTE:
+ xact->note = field;
+ break;
- case FIELD_UNKNOWN:
- if (! names[n].empty() && ! field.empty())
- xact->set_tag(names[n], string_value(field));
- break;
- }
- n++;
+ case FIELD_UNKNOWN:
+ if (! names[n].empty() && ! field.empty())
+ xact->set_tag(names[n], string_value(field));
+ break;
}
- }
- catch (const std::exception&) {
- add_error_context(_("While parsing CSV field:"));
- add_error_context(line_context(field));
- throw;
+ n++;
}
-#if 0
- xact->set_tag(_("Imported"),
- string(format_date(CURRENT_DATE(), FMT_WRITTEN)));
- xact->set_tag(_("Original"), string(line));
- xact->set_tag(_("SHA1"), string(sha1sum(line)));
-#endif
+ if (rich_data) {
+ xact->set_tag(_("Imported"),
+ string_value(format_date(CURRENT_DATE(), FMT_WRITTEN)));
+ xact->set_tag(_("CSV"), string_value(line));
+ }
// Translate the account name, if we have enough information to do so
@@ -267,13 +259,11 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
post->xact = xact.get();
-#if 0
post->pos = position_t();
post->pos->pathname = pathname;
- post->pos->beg_pos = line_beg_pos;
+ post->pos->beg_pos = in.tellg();
post->pos->beg_line = linenum;
- post->pos->sequence = context.sequence++;
-#endif
+ post->pos->sequence = sequence++;
post->set_state(item_t::CLEARED);
post->account = bucket;
diff --git a/src/csv.h b/src/csv.h
index 5ff8b59e..cf350e9d 100644
--- a/src/csv.h
+++ b/src/csv.h
@@ -52,9 +52,13 @@ class account_t;
class csv_reader
{
- static const std::size_t MAX_LINE = 1024;
+ static const std::size_t MAX_LINE = 4096;
std::istream& in;
+ path pathname;
+ char linebuf[MAX_LINE];
+ std::size_t linenum;
+ std::size_t sequence;
enum headers_t {
FIELD_DATE = 0,
@@ -80,13 +84,11 @@ class csv_reader
std::vector<int> index;
std::vector<string> names;
- std::vector<string> fields;
-
- typedef std::map<string, string> string_map;
public:
- csv_reader(std::istream& _in)
- : in(_in),
+ csv_reader(std::istream& _in, const path& _pathname)
+ : in(_in), pathname(_pathname),
+ linenum(0), sequence(0),
date_mask("date"),
date_eff_mask("posted( ?date)?"),
code_mask("code"),
@@ -98,11 +100,30 @@ public:
read_index(in);
}
+ void read_index(std::istream& in);
string read_field(std::istream& in);
char * next_line(std::istream& in);
- void read_index(std::istream& in);
- xact_t * read_xact(journal_t& journal, account_t * bucket);
+ xact_t * read_xact(journal_t& journal, account_t * bucket, bool rich_data);
+
+ const char * get_last_line() const {
+ return linebuf;
+ }
+
+ path get_pathname() const {
+ return pathname;
+ }
+ std::size_t get_linenum() const {
+ return linenum;
+ }
+
+ void reset() {
+ pathname.clear();
+ index.clear();
+ names.clear();
+ linenum = 0;
+ sequence = 0;
+ }
};
} // namespace ledger
diff --git a/src/item.cc b/src/item.cc
index 056aa04c..7184c0ef 100644
--- a/src/item.cc
+++ b/src/item.cc
@@ -72,7 +72,7 @@ bool item_t::has_tag(const mask_t& tag_mask,
return false;
}
- optional<value_t> item_t::get_tag(const string& tag, bool) const
+optional<value_t> item_t::get_tag(const string& tag, bool) const
{
DEBUG("item.meta", "Getting item tag: " << tag);
if (metadata) {
diff --git a/src/journal.cc b/src/journal.cc
index 0691954f..bbfa205c 100644
--- a/src/journal.cc
+++ b/src/journal.cc
@@ -107,6 +107,17 @@ account_t * journal_t::find_account_re(const string& regexp)
bool journal_t::add_xact(xact_t * xact)
{
+ if (optional<value_t> ref = xact->get_tag(_("SHA1"))) {
+ std::pair<checksum_map_t::iterator, bool> result
+ = checksum_map.insert(checksum_map_t::value_type(ref->to_string(), xact));
+ if (! result.second) {
+ throw_(std::runtime_error,
+ _("Found duplicated transaction with SHA1: ")
+ << ref->to_string());
+ return false;
+ }
+ }
+
xact->journal = this;
if (! xact->finalize()) {
diff --git a/src/journal.h b/src/journal.h
index ca6b6e4f..49a6292b 100644
--- a/src/journal.h
+++ b/src/journal.h
@@ -63,6 +63,7 @@ typedef std::pair<mask_t, string> payee_mapping_t;
typedef std::list<payee_mapping_t> payee_mappings_t;
typedef std::pair<mask_t, account_t *> account_mapping_t;
typedef std::list<account_mapping_t> account_mappings_t;
+typedef std::map<string, xact_t *> checksum_map_t;
class journal_t : public noncopyable
{
@@ -117,6 +118,7 @@ public:
std::list<fileinfo_t> sources;
payee_mappings_t payee_mappings;
account_mappings_t account_mappings;
+ checksum_map_t checksum_map;
bool was_loaded;
journal_t();
@@ -198,6 +200,7 @@ private:
ar & sources;
ar & payee_mappings;
ar & account_mappings;
+ ar & checksum_map;
}
#endif // HAVE_BOOST_SERIALIZATION
};
diff --git a/src/report.cc b/src/report.cc
index b3b7233f..c562ab38 100644
--- a/src/report.cc
+++ b/src/report.cc
@@ -1094,6 +1094,7 @@ option_t<report_t> * report_t::lookup_option(const char * p)
else OPT(revalued);
else OPT(revalued_only);
else OPT(revalued_total_);
+ else OPT(rich_data);
break;
case 's':
OPT(sort_);
diff --git a/src/report.h b/src/report.h
index a001ffb1..565728df 100644
--- a/src/report.h
+++ b/src/report.h
@@ -313,6 +313,7 @@ public:
HANDLER(revalued).report(out);
HANDLER(revalued_only).report(out);
HANDLER(revalued_total_).report(out);
+ HANDLER(rich_data).report(out);
HANDLER(seed_).report(out);
HANDLER(sort_).report(out);
HANDLER(sort_all_).report(out);
@@ -893,6 +894,8 @@ public:
set_expr(args.get<string>(0), args.get<string>(1));
});
+ OPTION(report_t, rich_data);
+
OPTION(report_t, seed_);
OPTION_(report_t, sort_, DO_(args) { // -S
diff --git a/src/textual.cc b/src/textual.cc
index ddbd9943..13032236 100644
--- a/src/textual.cc
+++ b/src/textual.cc
@@ -1150,8 +1150,9 @@ post_t * instance_t::parse_post(char * line,
if (context.strict && ! post->account->has_flags(ACCOUNT_KNOWN)) {
if (post->_state == item_t::UNCLEARED)
- warning_(_("\"%1\", line %2: Unknown account '%3'")
- << pathname.string() << linenum << post->account->fullname());
+ warning_(_("%1Unknown account '%2'")
+ << file_context(pathname, linenum)
+ << post->account->fullname());
post->account->add_flags(ACCOUNT_KNOWN);
}
@@ -1181,8 +1182,9 @@ post_t * instance_t::parse_post(char * line,
if (context.strict &&
! post->amount.commodity().has_flags(COMMODITY_KNOWN)) {
if (post->_state == item_t::UNCLEARED)
- warning_(_("\"%1\", line %2: Unknown commodity '%3'")
- << pathname.string() << linenum << post->amount.commodity());
+ warning_(_("%1Unknown commodity '%2'")
+ << file_context(pathname, linenum)
+ << post->amount.commodity());
post->amount.commodity().add_flags(COMMODITY_KNOWN);
}
diff --git a/src/utils.cc b/src/utils.cc
index 42600db3..2f64bb0a 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -50,8 +50,8 @@ void debug_assert(const string& reason,
std::size_t line)
{
std::ostringstream buf;
- buf << "Assertion failed in \"" << file << "\", line " << line
- << ": " << func << ": " << reason;
+ buf << "Assertion failed in " << file_context(file, line)
+ << func << ": " << reason;
throw assertion_failed(buf.str());
}