summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2010-03-07 22:53:03 -0500
committerJohn Wiegley <johnw@newartisans.com>2010-03-08 01:11:55 -0500
commite070cdfc8ddcf9d6a25b593502f1c5ade56c849c (patch)
tree0ed450b5a7e326501dbda8119a90684d6d95df3b
parent89992ad4b8b6e7d9ab7c051f50abe6fae7277f73 (diff)
downloadfork-ledger-e070cdfc8ddcf9d6a25b593502f1c5ade56c849c.tar.gz
fork-ledger-e070cdfc8ddcf9d6a25b593502f1c5ade56c849c.tar.bz2
fork-ledger-e070cdfc8ddcf9d6a25b593502f1c5ade56c849c.zip
The CSV reader now auto-correlates fields by regex
-rw-r--r--src/csv.cc217
-rw-r--r--src/csv.h45
2 files changed, 210 insertions, 52 deletions
diff --git a/src/csv.cc b/src/csv.cc
index c0f8cd0e..5a74232f 100644
--- a/src/csv.cc
+++ b/src/csv.cc
@@ -40,7 +40,7 @@
namespace ledger {
-string csv_reader::read_field()
+string csv_reader::read_field(std::istream& in)
{
string field;
@@ -53,6 +53,9 @@ string csv_reader::read_field()
if (x == '\\') {
in.get(x);
}
+ else if (x == '"' && in.peek() == '"') {
+ in.get(x);
+ }
else if (x == c) {
if (x == '|')
in.unget();
@@ -60,65 +63,93 @@ string csv_reader::read_field()
in.get(c);
break;
}
- field += x;
+ if (x != '\0')
+ field += x;
}
}
else {
-
+ while (in.good() && ! in.eof()) {
+ in.get(c);
+ if (c == ',')
+ break;
+ if (c != '\0')
+ field += c;
+ }
}
+ trim(field);
return field;
}
-xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
+char * csv_reader::next_line(std::istream& in)
{
static char linebuf[MAX_LINE + 1];
+ while (in.good() && ! in.eof() && in.peek() == '#')
+ in.getline(linebuf, MAX_LINE);
+
if (! in.good() || in.eof())
return NULL;
- std::auto_ptr<xact_t> xact;
+ in.getline(linebuf, MAX_LINE);
- while (in.good() && ! in.eof() && in.peek() == '#')
- in.getline(linebuf, MAX_LINE);
+ return linebuf;
+}
- xact.reset(new xact_t);
+void csv_reader::read_index(std::istream& in)
+{
+ char * line = next_line(in);
+ if (! line)
+ return;
- xact->pos = position_t();
- xact->pos->pathname = "jww (2010-03-05): unknown";
- xact->pos->beg_pos = in.tellg();
- xact->pos->beg_line = 0;
- xact->pos->sequence = 0;
+ std::istringstream instr(line);
- string date = read_field(); trim(date);
- string code = read_field(); trim(code);
- string payee = read_field(); trim(payee);
+ while (instr.good() && ! instr.eof()) {
+ string field = read_field(instr);
+ names.push_back(field);
- if (date.empty())
- return NULL;
+ if (date_mask.match(field))
+ index.push_back(FIELD_DATE);
+ else if (date_eff_mask.match(field))
+ index.push_back(FIELD_DATE_EFF);
+ else if (code_mask.match(field))
+ index.push_back(FIELD_CODE);
+ else if (payee_mask.match(field))
+ index.push_back(FIELD_PAYEE);
+ else if (amount_mask.match(field))
+ index.push_back(FIELD_AMOUNT);
+ else if (cost_mask.match(field))
+ index.push_back(FIELD_COST);
+ else if (total_mask.match(field))
+ index.push_back(FIELD_TOTAL);
+ else if (note_mask.match(field))
+ index.push_back(FIELD_NOTE);
+ else
+ index.push_back(FIELD_UNKNOWN);
- xact->set_state(item_t::CLEARED);
- xact->_date = parse_date(date);
- if (! code.empty())
- xact->code = code;
-
- bool found = false;
- foreach (payee_mapping_t& value, journal.payee_mappings) {
- DEBUG("csv.mappings", "Looking for payee mapping: " << value.first);
- if (value.first.match(payee)) {
- xact->payee = value.second;
- found = true;
- break;
- }
+ DEBUG("csv.parse", "Header field: " << field);
}
- if (! found)
- xact->payee = payee;
+}
+
+xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
+{
+ restart:
+ char * line = next_line(in);
+ if (! line || index.empty())
+ return NULL;
- string amount = read_field(); trim(amount);
- string total = read_field(); trim(total);
- in.getline(linebuf, MAX_LINE); // skip to the next line
+ std::istringstream instr(line);
+ std::auto_ptr<xact_t> xact(new xact_t);
std::auto_ptr<post_t> post(new post_t);
+ xact->set_state(item_t::CLEARED);
+
+ xact->pos = position_t();
+ xact->pos->pathname = "jww (2010-03-05): unknown";
+ xact->pos->beg_pos = in.tellg();
+ xact->pos->beg_line = 0;
+ xact->pos->sequence = 0;
+
post->xact = xact.get();
#if 0
@@ -130,7 +161,96 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
#endif
post->set_state(item_t::CLEARED);
- post->account = journal.master->find_account(_("Expenses:Unknown"));
+ post->account = NULL;
+
+ int n = 0;
+ amount_t amt;
+ string total;
+
+ while (instr.good() && ! instr.eof()) {
+ string field = read_field(instr);
+
+ switch (index[n]) {
+ case FIELD_DATE:
+ if (field.empty())
+ goto restart;
+ try {
+ xact->_date = parse_date(field);
+ }
+ catch (date_error&) {
+ goto restart;
+ }
+ break;
+
+ case FIELD_DATE_EFF:
+ xact->_date_eff = parse_date(field);
+ break;
+
+ case FIELD_CODE:
+ if (! field.empty())
+ xact->code = field;
+ break;
+
+ case FIELD_PAYEE: {
+ bool found = false;
+ foreach (payee_mapping_t& value, journal.payee_mappings) {
+ DEBUG("csv.mappings", "Looking for payee mapping: " << value.first);
+ if (value.first.match(field)) {
+ xact->payee = value.second;
+ found = true;
+ break;
+ }
+ }
+ if (! found)
+ xact->payee = field;
+ break;
+ }
+
+ case FIELD_AMOUNT: {
+ std::istringstream amount_str(field);
+ amt.parse(amount_str, PARSE_NO_REDUCE);
+ if (! amt.has_commodity() &&
+ commodity_pool_t::current_pool->default_commodity)
+ amt.set_commodity(*commodity_pool_t::current_pool->default_commodity);
+ post->amount = amt;
+ break;
+ }
+
+ case FIELD_COST: {
+ std::istringstream amount_str(field);
+ amt.parse(amount_str, PARSE_NO_REDUCE);
+ if (! amt.has_commodity() &&
+ commodity_pool_t::current_pool->default_commodity)
+ amt.set_commodity
+ (*commodity_pool_t::current_pool->default_commodity);
+ post->cost = amt;
+ break;
+ }
+
+ case FIELD_TOTAL:
+ total = field;
+ break;
+
+ case FIELD_NOTE:
+ xact->note = field;
+ break;
+
+ case FIELD_UNKNOWN:
+ if (! names[n].empty() && ! field.empty())
+ xact->set_tag(names[n], field);
+ break;
+ }
+ n++;
+ }
+
+#if 0
+ xact->set_tag(_("Imported"),
+ string(format_date(CURRENT_DATE(), FMT_WRITTEN)));
+ xact->set_tag(_("Original"), string(line));
+ xact->set_tag(_("SHA1"), string(sha1sum(line)));
+#endif
+
+ // Translate the account name, if we have enough information to do so
foreach (account_mapping_t& value, journal.account_mappings) {
if (value.first.match(xact->payee)) {
@@ -139,17 +259,10 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
}
}
- std::istringstream amount_str(amount);
- amount_t amt;
- amt.parse(amount_str, PARSE_NO_REDUCE);
- if (! amt.has_commodity() &&
- commodity_pool_t::current_pool->default_commodity)
- amt.set_commodity
- (*commodity_pool_t::current_pool->default_commodity);
- post->amount = amt;
-
xact->add_post(post.release());
+ // Create the "balancing post", which refers to the account for this data
+
post.reset(new post_t);
post->xact = xact.get();
@@ -164,13 +277,17 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
post->set_state(item_t::CLEARED);
post->account = bucket;
- post->amount = - amt;
+
+ if (! amt.is_null())
+ post->amount = - amt;
if (! total.empty()) {
std::istringstream assigned_amount_str(total);
- amount_t assigned_amount;
- assigned_amount.parse(assigned_amount_str, PARSE_NO_REDUCE);
- post->assigned_amount = assigned_amount;
+ amt.parse(assigned_amount_str, PARSE_NO_REDUCE);
+ if (! amt.has_commodity() &&
+ commodity_pool_t::current_pool->default_commodity)
+ amt.set_commodity(*commodity_pool_t::current_pool->default_commodity);
+ post->assigned_amount = amt;
}
xact->add_post(post.release());
diff --git a/src/csv.h b/src/csv.h
index 7029d482..5ff8b59e 100644
--- a/src/csv.h
+++ b/src/csv.h
@@ -56,10 +56,51 @@ class csv_reader
std::istream& in;
+ enum headers_t {
+ FIELD_DATE = 0,
+ FIELD_DATE_EFF,
+ FIELD_CODE,
+ FIELD_PAYEE,
+ FIELD_AMOUNT,
+ FIELD_COST,
+ FIELD_TOTAL,
+ FIELD_NOTE,
+
+ FIELD_UNKNOWN
+ };
+
+ mask_t date_mask;
+ mask_t date_eff_mask;
+ mask_t code_mask;
+ mask_t payee_mask;
+ mask_t amount_mask;
+ mask_t cost_mask;
+ mask_t total_mask;
+ mask_t note_mask;
+
+ std::vector<int> index;
+ std::vector<string> names;
+ std::vector<string> fields;
+
+ typedef std::map<string, string> string_map;
+
public:
- csv_reader(std::istream& _in) : in(_in) {}
+ csv_reader(std::istream& _in)
+ : in(_in),
+ date_mask("date"),
+ date_eff_mask("posted( ?date)?"),
+ code_mask("code"),
+ payee_mask("(payee|desc(ription)?|title)"),
+ amount_mask("amount"),
+ cost_mask("cost"),
+ total_mask("total"),
+ note_mask("note") {
+ read_index(in);
+ }
- string read_field();
+ string read_field(std::istream& in);
+ char * next_line(std::istream& in);
+ void read_index(std::istream& in);
xact_t * read_xact(journal_t& journal, account_t * bucket);
};