summaryrefslogtreecommitdiff
path: root/doc/grammar.y
diff options
context:
space:
mode:
Diffstat (limited to 'doc/grammar.y')
-rw-r--r--doc/grammar.y221
1 files changed, 221 insertions, 0 deletions
diff --git a/doc/grammar.y b/doc/grammar.y
new file mode 100644
index 00000000..018e7391
--- /dev/null
+++ b/doc/grammar.y
@@ -0,0 +1,221 @@
+/**
+ * @file grammar.y
+ * @version 3.0
+ * @author John Wiegley
+ *
+ * @brief Canonical BNF grammar for Ledger data files
+ *
+ * Extensions are permitted if: they are not required, and they are
+ * backwards-compatible with this grammar.
+ */
+
+/*
+ * There are three special terminals in this grammar, which violate its
+ * context free nature:
+ *
+ * TEXT -- consumes all characters until the next terminal
+ * or EOL (end of line)
+ * WHITESPACE -- any amount of whitespace, not including EOL
+ * STRING -- characters up to the next WHITESPACE or EOL
+ *
+ * BIGINT -- a number of any width, matching [0-9]+
+ * INT4 -- a four digit wide number
+ * INT2 -- a two digit wide number
+ * INT1 -- a one digit wide number
+ *
+ * Except for 1) the 'spacer' production (see below), 2) EOL, and 3) the
+ * WHITESPACE required to begin a posting, whitespace is otherwise
+ * ignored.
+ *
+ * Yes, this grammar is confusing and not so happy for machine readers,
+ * but it was designed for the human author and reader. Once parsed,
+ * the contents must be unambiguous, which means they can be output to
+ * more rigorous formats for other programs to consume.
+ */
+
+/*
+ * Journals
+ *
+ * A journal is a file which primarily contains xacts, among other elements.
+ */
+
+journal:
+ journal_item journal |
+ /* epsilon */
+ ;
+
+journal_item:
+ whitespace
+ directive |
+ xact |
+ ;
+
+whitespace:
+ EOL |
+ WHITESPACE EOL |
+ ';' TEXT EOL | /* these next four are all ignored */
+ '*' TEXT EOL |
+ ;
+
+directive:
+ '@' word_directive EOL |
+ '!' word_directive EOL |
+ word_directive EOL |
+ char_directive EOL
+ ;
+
+word_directive:
+ "include" TEXT |
+ "account" TEXT |
+ "end" |
+ "alias" STRING '=' TEXT |
+ "def" TEXT |
+ TEXT WHITESPACE TEXT /* looked up in session (aka maybe Python) */
+ ;
+
+char_directive:
+ 'i' date time TEXT | /* a timeclock.el "check in" */
+ 'I' date time TEXT |
+ 'o' date time TEXT | /* a timeclock.el "check out" */
+ 'O' date time TEXT |
+ 'h' TEXT EOL |
+ 'b' TEXT EOL |
+ 'D' amount | /* sets display parameters for a commodity */
+ 'A' TEXT | /* sets the "default balancing account" */
+ 'C' commodity '=' amount | /* specifies a commodity conversion */
+ 'P' date time commodity amount | /* a pricing history xact */
+ 'N' commodity | /* commodity's price is never downloaded */
+ 'Y' INT4 | /* sets the default year for date parsing */
+ '-' '-' STRING TEXT | /* specify command-line options in the file */
+ ;
+
+date: INT4 date_sep INT2 date_sep INT2 ;
+date_opt: '=' date | /* epsilon */ ;
+date_sep: '/' | '-' | '.' ;
+
+time: INT2 ':' INT2 ':' INT2 ;
+
+commodity:
+ '"' TEXT '"' |
+ STRING ;
+
+/*
+ * Xacts
+ *
+ * Xacts are the atomic units of accounting, which are composed of
+ * multiple postings between accounts, so long as it all balances in
+ * the end.
+ */
+
+xact: plain_xact |
+ periodic_xact |
+ automated_xact ;
+
+plain_xact:
+ date date_opt status_opt code_opt FULLSTRING note_opt EOL
+ postings ;
+
+status_opt: status | /* epsilon */ ;
+status: '*' | '!' | /* epsilon */ ;
+
+code_opt: code | /* epsilon */ ;
+code: '(' TEXT ')' ;
+
+spacer: ' ' ' ' | '\t' | ' ' '\t' ;
+
+note_opt: spacer note | /* epsilon */ ;
+note: ';' TEXT ;
+
+/* ---------------------------------------------------------------------- */
+
+periodic_xact:
+ '~' period_expr note_opt EOL
+ posting postings ;
+
+/*
+ * A period expression has its own sub-grammar, which I don't quite have
+ * the time to exhaustively describe now. See datetime.cc. It allows
+ * for lots and lots of things, and is probably horribly ambiguous.
+ */
+
+period_expr: FULLSTRING ;
+
+/* ---------------------------------------------------------------------- */
+
+automated_xact:
+ '=' value_expr note_opt EOL
+ posting postings ;
+
+/*
+ * Value expressions are a algebraic math expressions very similar to
+ * XPath (minus the path traversal items). This grammar needs fleshing
+ * out also, since it's allowed in many places.
+ */
+
+value_expr: FULLSTRING ;
+
+/*
+ * There is a serious ambiguity here which the parser resolves as
+ * follows: if an amount_expr can be parsed as an amount, it's an
+ * amount; otherwise, it's a value expression.
+ */
+
+quantity: neg_opt BIGINT decimal_opt ;
+
+neg_opt: '-' | /* epsilon */ ;
+decimal_opt: '.' BIGINT | /* epsilon */ ;
+
+annotation: lot_price_opt lot_date_opt lot_note_opt ;
+
+lot_date_opt: date | /* epsilon */ ;
+lot_date: '[' date ']' ;
+
+lot_price_opt: price | /* epsilon */ ;
+lot_price: '{' amount '}' ;
+
+lot_note_opt: note | /* epsilon */ ;
+lot_note: '(' string ')' ;
+
+amount:
+ neg_opt commodity quantity annotation |
+ quantity commodity annotation ;
+
+amount_expr: amount | value_expr ;
+
+/*
+ * Postings
+ *
+ * Postings are the fundamental unit of accounting, and represent
+ * the movement of commodities to or from an account. Thus, paying off
+ * your credit card consists of two balancing postings: one that
+ * withdraws money from your checking account, and another which pays
+ * money to your credit institution.
+ */
+
+postings:
+ posting postings |
+ /* epsilon */
+ ;
+
+posting:
+ WHITESPACE status_opt account values_opt note_opt EOL;
+
+account_name: FULLSTRING ;
+
+values_opt:
+ spacer amount_expr price_opt |
+ /* epsilon */
+ ;
+
+price_opt: price | /* epsilon */ ;
+price:
+ '@' amount_expr |
+ '@@' amount_expr /* in this case, it's the whole price */
+ ;
+
+account:
+ account_name |
+ '(' account_name ')' |
+ '[' account_name ']' ;
+
+/* grammar.y ends here */