summaryrefslogtreecommitdiff
path: root/doc/grammar.y
blob: 9a5f740b3fa1eb3d1f1b129320e07395b1a49fdc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
/**
 * @file grammar.y
 * @version 3.0
 * @author John Wiegley
 *
 * @brief Canonical BNF grammar for Ledger data files
 *
 * Extensions are permitted if: they are not required, and they are
 * backwards-compatible with this grammar.
 */

/*
 * There are three special terminals in this grammar, which violate its
 * context free nature:
 *
 * TEXT       -- consumes all characters until the next terminal
 *               or EOL (end of line)
 * WHITESPACE -- any amount of whitespace, not including EOL
 * STRING     -- characters up to the next WHITESPACE or EOL
 *
 * BIGINT     -- a number of any width, matching [0-9]+
 * INT4       -- a four digit wide number
 * INT2       -- a two digit wide number
 * INT1       -- a one digit wide number
 *
 * Except for 1) the 'spacer' production (see below), 2) EOL, and 3) the
 * WHITESPACE required to begin a posting, whitespace is otherwise
 * ignored.
 *
 * Yes, this grammar is confusing and not so happy for machine readers,
 * but it was designed for the human author and reader.  Once parsed,
 * the contents must be unambiguous, which means they can be output to
 * more rigorous formats for other programs to consume.
 */

/*
 * Journals
 *
 * A journal is a file which primarily contains xacts, among other elements.
 */

journal:
    journal_item journal |
    /* epsilon */
    ;

journal_item:
    whitespace
    directive |
    xact |
    ;

whitespace:
    EOL |
    WHITESPACE EOL |
    ';' TEXT EOL |              /* these next four are all ignored */
    '*' TEXT EOL |
    ;

directive:
    '@' word_directive EOL |
    '!' word_directive EOL |
    word_directive EOL |
    char_directive EOL
    ;

word_directive:
    "include" TEXT |
    "account" TEXT |
    "end" |
    "alias" STRING '=' TEXT |
    "def" TEXT |
    TEXT WHITESPACE TEXT        /* looked up in session (aka maybe Python) */
    ;

char_directive:
    'i' date time TEXT |        /* a timeclock.el "check in" */
    'I' date time TEXT |
    'o' date time TEXT |        /* a timeclock.el "check out" */
    'O' date time TEXT |
    'h' TEXT EOL |
    'b' TEXT EOL |
    'D' amount |                /* sets display parameters for a commodity */
    'A' TEXT |                  /* sets the "default balancing account" */
    'C' commodity '=' amount |  /* specifies a commodity conversion */
    'P' date time commodity amount | /* a pricing history xact */
    'N' commodity |             /* commodity's price is never downloaded */
    'Y' INT4 |                  /* sets the default year for date parsing */
    '-' '-' STRING TEXT |       /* specify command-line options in the file */
    ;

date: INT4 date_sep INT2 date_sep INT2 ;
date_opt: '=' date | /* epsilon */ ;
date_sep: '/' | '-' | '.' ;

time: INT2 ':' INT2 ':' INT2 ;

commodity:
    '"' TEXT '"' |
    STRING ;

/*
 * Xacts
 *
 * Xacts are the atomic units of accounting, which are composed of
 * multiple postings between accounts, so long as it all balances in
 * the end.
 */

xact: plain_xact |
       periodic_xact |
       automated_xact ;

plain_xact:
    date date_opt status_opt code_opt FULLSTRING note_opt EOL
    postings ;

status_opt: status | /* epsilon */ ;
status: '*' | '!' | /* epsilon */ ;

code_opt: code | /* epsilon */ ;
code: '(' TEXT ')' ;

spacer: ' ' ' ' | '\t' | ' ' '\t' ;

note_opt: spacer note | /* epsilon */ ;
note: ';' TEXT ;

/* ---------------------------------------------------------------------- */

periodic_xact:
    '~' period_expr note_opt EOL
    posting postings ;

/*
 * A period expression has its own sub-grammar, which I don't quite have
 * the time to exhaustively describe now.  See datetime.cc.  It allows
 * for lots and lots of things, and is probably horribly ambiguous.
 */

period_expr: FULLSTRING ;

/* ---------------------------------------------------------------------- */

automated_xact:
    '=' value_expr note_opt EOL
    posting postings ;

/*
 * Value expressions are a algebraic math expressions very similar to
 * XPath (minus the path traversal items).  This grammar needs fleshing
 * out also, since it's allowed in many places.
 */

value_expr: FULLSTRING ;

/*
 * There is a serious ambiguity here which the parser resolves as
 * follows: if an amount_expr can be parsed as an amount, it's an
 * amount; otherwise, it's a value expression.
 */

quantity: neg_opt BIGINT decimal_opt ;

neg_opt: '-' | /* epsilon */ ;
decimal_opt: '.' BIGINT | /* epsilon */ ;

annotation: lot_price_opt lot_date_opt lot_note_opt ;

lot_date_opt: date | /* epsilon */ ;
lot_date: '[' date ']' ;

lot_price_opt: price | /* epsilon */ ;
lot_price: '{' amount '}' ;

lot_note_opt: note | /* epsilon */ ;
lot_note: '(' string ')' ;

amount:
    neg_opt commodity quantity annotation |
    quantity commodity annotation ;

amount_expr: amount | value_expr ;

/*
 * Postings
 *
 * Postings are the fundamental unit of accounting, and represent
 * the movement of commodities to or from an account.  Thus, paying off
 * your credit card consists of two balancing postings: one that
 * withdraws money from your checking account, and another which pays
 * money to your credit institution.
 */

postings:
    posting postings |
    /* epsilon */
    ;

posting:
    WHITESPACE status_opt account values_opt note_opt EOL;

account_name: FULLSTRING ;

values_opt:
    spacer amount_expr price_opt |
    /* epsilon */
    ;

price_opt: price | /* epsilon */ ;
price:
    '@' amount_expr |
    '@@' amount_expr            /* in this case, it's the whole price */
    ;

account:
    account_name |
    '(' account_name ')' |
    '[' account_name ']' ;

/* grammar.y ends here */