summaryrefslogtreecommitdiff
path: root/src/csv.cc
diff options
context:
space:
mode:
authorJohn Wiegley <johnw@newartisans.com>2010-03-07 22:53:03 -0500
committerJohn Wiegley <johnw@newartisans.com>2010-03-08 01:11:55 -0500
commite070cdfc8ddcf9d6a25b593502f1c5ade56c849c (patch)
tree0ed450b5a7e326501dbda8119a90684d6d95df3b /src/csv.cc
parent89992ad4b8b6e7d9ab7c051f50abe6fae7277f73 (diff)
downloadfork-ledger-e070cdfc8ddcf9d6a25b593502f1c5ade56c849c.tar.gz
fork-ledger-e070cdfc8ddcf9d6a25b593502f1c5ade56c849c.tar.bz2
fork-ledger-e070cdfc8ddcf9d6a25b593502f1c5ade56c849c.zip
The CSV reader now auto-correlates fields by regex
Diffstat (limited to 'src/csv.cc')
-rw-r--r--src/csv.cc217
1 files changed, 167 insertions, 50 deletions
diff --git a/src/csv.cc b/src/csv.cc
index c0f8cd0e..5a74232f 100644
--- a/src/csv.cc
+++ b/src/csv.cc
@@ -40,7 +40,7 @@
namespace ledger {
-string csv_reader::read_field()
+string csv_reader::read_field(std::istream& in)
{
string field;
@@ -53,6 +53,9 @@ string csv_reader::read_field()
if (x == '\\') {
in.get(x);
}
+ else if (x == '"' && in.peek() == '"') {
+ in.get(x);
+ }
else if (x == c) {
if (x == '|')
in.unget();
@@ -60,65 +63,93 @@ string csv_reader::read_field()
in.get(c);
break;
}
- field += x;
+ if (x != '\0')
+ field += x;
}
}
else {
-
+ while (in.good() && ! in.eof()) {
+ in.get(c);
+ if (c == ',')
+ break;
+ if (c != '\0')
+ field += c;
+ }
}
+ trim(field);
return field;
}
-xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
+char * csv_reader::next_line(std::istream& in)
{
static char linebuf[MAX_LINE + 1];
+ while (in.good() && ! in.eof() && in.peek() == '#')
+ in.getline(linebuf, MAX_LINE);
+
if (! in.good() || in.eof())
return NULL;
- std::auto_ptr<xact_t> xact;
+ in.getline(linebuf, MAX_LINE);
- while (in.good() && ! in.eof() && in.peek() == '#')
- in.getline(linebuf, MAX_LINE);
+ return linebuf;
+}
- xact.reset(new xact_t);
+void csv_reader::read_index(std::istream& in)
+{
+ char * line = next_line(in);
+ if (! line)
+ return;
- xact->pos = position_t();
- xact->pos->pathname = "jww (2010-03-05): unknown";
- xact->pos->beg_pos = in.tellg();
- xact->pos->beg_line = 0;
- xact->pos->sequence = 0;
+ std::istringstream instr(line);
- string date = read_field(); trim(date);
- string code = read_field(); trim(code);
- string payee = read_field(); trim(payee);
+ while (instr.good() && ! instr.eof()) {
+ string field = read_field(instr);
+ names.push_back(field);
- if (date.empty())
- return NULL;
+ if (date_mask.match(field))
+ index.push_back(FIELD_DATE);
+ else if (date_eff_mask.match(field))
+ index.push_back(FIELD_DATE_EFF);
+ else if (code_mask.match(field))
+ index.push_back(FIELD_CODE);
+ else if (payee_mask.match(field))
+ index.push_back(FIELD_PAYEE);
+ else if (amount_mask.match(field))
+ index.push_back(FIELD_AMOUNT);
+ else if (cost_mask.match(field))
+ index.push_back(FIELD_COST);
+ else if (total_mask.match(field))
+ index.push_back(FIELD_TOTAL);
+ else if (note_mask.match(field))
+ index.push_back(FIELD_NOTE);
+ else
+ index.push_back(FIELD_UNKNOWN);
- xact->set_state(item_t::CLEARED);
- xact->_date = parse_date(date);
- if (! code.empty())
- xact->code = code;
-
- bool found = false;
- foreach (payee_mapping_t& value, journal.payee_mappings) {
- DEBUG("csv.mappings", "Looking for payee mapping: " << value.first);
- if (value.first.match(payee)) {
- xact->payee = value.second;
- found = true;
- break;
- }
+ DEBUG("csv.parse", "Header field: " << field);
}
- if (! found)
- xact->payee = payee;
+}
+
+xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
+{
+ restart:
+ char * line = next_line(in);
+ if (! line || index.empty())
+ return NULL;
- string amount = read_field(); trim(amount);
- string total = read_field(); trim(total);
- in.getline(linebuf, MAX_LINE); // skip to the next line
+ std::istringstream instr(line);
+ std::auto_ptr<xact_t> xact(new xact_t);
std::auto_ptr<post_t> post(new post_t);
+ xact->set_state(item_t::CLEARED);
+
+ xact->pos = position_t();
+ xact->pos->pathname = "jww (2010-03-05): unknown";
+ xact->pos->beg_pos = in.tellg();
+ xact->pos->beg_line = 0;
+ xact->pos->sequence = 0;
+
post->xact = xact.get();
#if 0
@@ -130,7 +161,96 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
#endif
post->set_state(item_t::CLEARED);
- post->account = journal.master->find_account(_("Expenses:Unknown"));
+ post->account = NULL;
+
+ int n = 0;
+ amount_t amt;
+ string total;
+
+ while (instr.good() && ! instr.eof()) {
+ string field = read_field(instr);
+
+ switch (index[n]) {
+ case FIELD_DATE:
+ if (field.empty())
+ goto restart;
+ try {
+ xact->_date = parse_date(field);
+ }
+ catch (date_error&) {
+ goto restart;
+ }
+ break;
+
+ case FIELD_DATE_EFF:
+ xact->_date_eff = parse_date(field);
+ break;
+
+ case FIELD_CODE:
+ if (! field.empty())
+ xact->code = field;
+ break;
+
+ case FIELD_PAYEE: {
+ bool found = false;
+ foreach (payee_mapping_t& value, journal.payee_mappings) {
+ DEBUG("csv.mappings", "Looking for payee mapping: " << value.first);
+ if (value.first.match(field)) {
+ xact->payee = value.second;
+ found = true;
+ break;
+ }
+ }
+ if (! found)
+ xact->payee = field;
+ break;
+ }
+
+ case FIELD_AMOUNT: {
+ std::istringstream amount_str(field);
+ amt.parse(amount_str, PARSE_NO_REDUCE);
+ if (! amt.has_commodity() &&
+ commodity_pool_t::current_pool->default_commodity)
+ amt.set_commodity(*commodity_pool_t::current_pool->default_commodity);
+ post->amount = amt;
+ break;
+ }
+
+ case FIELD_COST: {
+ std::istringstream amount_str(field);
+ amt.parse(amount_str, PARSE_NO_REDUCE);
+ if (! amt.has_commodity() &&
+ commodity_pool_t::current_pool->default_commodity)
+ amt.set_commodity
+ (*commodity_pool_t::current_pool->default_commodity);
+ post->cost = amt;
+ break;
+ }
+
+ case FIELD_TOTAL:
+ total = field;
+ break;
+
+ case FIELD_NOTE:
+ xact->note = field;
+ break;
+
+ case FIELD_UNKNOWN:
+ if (! names[n].empty() && ! field.empty())
+ xact->set_tag(names[n], field);
+ break;
+ }
+ n++;
+ }
+
+#if 0
+ xact->set_tag(_("Imported"),
+ string(format_date(CURRENT_DATE(), FMT_WRITTEN)));
+ xact->set_tag(_("Original"), string(line));
+ xact->set_tag(_("SHA1"), string(sha1sum(line)));
+#endif
+
+ // Translate the account name, if we have enough information to do so
foreach (account_mapping_t& value, journal.account_mappings) {
if (value.first.match(xact->payee)) {
@@ -139,17 +259,10 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
}
}
- std::istringstream amount_str(amount);
- amount_t amt;
- amt.parse(amount_str, PARSE_NO_REDUCE);
- if (! amt.has_commodity() &&
- commodity_pool_t::current_pool->default_commodity)
- amt.set_commodity
- (*commodity_pool_t::current_pool->default_commodity);
- post->amount = amt;
-
xact->add_post(post.release());
+ // Create the "balancing post", which refers to the account for this data
+
post.reset(new post_t);
post->xact = xact.get();
@@ -164,13 +277,17 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
post->set_state(item_t::CLEARED);
post->account = bucket;
- post->amount = - amt;
+
+ if (! amt.is_null())
+ post->amount = - amt;
if (! total.empty()) {
std::istringstream assigned_amount_str(total);
- amount_t assigned_amount;
- assigned_amount.parse(assigned_amount_str, PARSE_NO_REDUCE);
- post->assigned_amount = assigned_amount;
+ amt.parse(assigned_amount_str, PARSE_NO_REDUCE);
+ if (! amt.has_commodity() &&
+ commodity_pool_t::current_pool->default_commodity)
+ amt.set_commodity(*commodity_pool_t::current_pool->default_commodity);
+ post->assigned_amount = amt;
}
xact->add_post(post.release());