diff options
Diffstat (limited to 'lisp/emacs-lisp/smie.el')
-rw-r--r-- | lisp/emacs-lisp/smie.el | 218 |
1 files changed, 157 insertions, 61 deletions
diff --git a/lisp/emacs-lisp/smie.el b/lisp/emacs-lisp/smie.el index e537de6f031..2a12f03e514 100644 --- a/lisp/emacs-lisp/smie.el +++ b/lisp/emacs-lisp/smie.el @@ -1,6 +1,6 @@ -;;; smie.el --- Simple Minded Indentation Engine +;;; smie.el --- Simple Minded Indentation Engine -*- lexical-binding: t -*- -;; Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. +;; Copyright (C) 2010-2012 Free Software Foundation, Inc. ;; Author: Stefan Monnier <monnier@iro.umontreal.ca> ;; Keywords: languages, lisp, internal, parsing, indentation @@ -56,7 +56,7 @@ ;; building the 2D precedence tables and then computing the precedence levels ;; from it) can be found in pages 187-194 of "Parsing techniques" by Dick Grune ;; and Ceriel Jacobs (BookBody.pdf available at -;; http://www.cs.vu.nl/~dick/PTAPG.html). +;; http://dickgrune.com/Books/PTAPG_1st_Edition/). ;; ;; OTOH we had to kill many chickens, read many coffee grounds, and practice ;; untold numbers of black magic spells, to come up with the indentation code. @@ -69,13 +69,23 @@ ;; (exp ("IF" exp "ELSE" exp "END") ("CASE" cases "END")) ;; (cases (cases "ELSE" insts) ...) ;; The IF-rule implies ELSE=END and the CASE-rule implies ELSE>END. -;; FIXME: we could try to resolve such conflicts automatically by changing -;; the way BNF rules such as the IF-rule is handled. I.e. rather than -;; IF=ELSE and ELSE=END, we could turn them into IF<ELSE and ELSE>END -;; and IF=END, +;; This can be resolved simply with: +;; (exp ("IF" expelseexp "END") ("CASE" cases "END")) +;; (expelseexp (exp) (exp "ELSE" exp)) +;; (cases (cases "ELSE" insts) ...) +;; - Another source of conflict is when a terminator/separator is used to +;; terminate elements at different levels, as in: +;; (decls ("VAR" vars) (decls "," decls)) +;; (vars (id) (vars "," vars)) +;; often these can be resolved by making the lexer distinguish the two +;; kinds of commas, e.g. based on the following token. ;; TODO & BUGS: ;; +;; - We could try to resolve conflicts such as the IFexpELSEexpEND -vs- +;; CASE(casesELSEexp)END automatically by changing the way BNF rules such as +;; the IF-rule is handled. I.e. rather than IF=ELSE and ELSE=END, we could +;; turn them into IF<ELSE and ELSE>END and IF=END. ;; - Using the structural information SMIE gives us, it should be possible to ;; implement a `smie-align' command that would automatically figure out what ;; there is to align and how to do it (something like: align the token of @@ -84,9 +94,33 @@ ;; - Maybe accept two juxtaposed non-terminals in the BNF under the condition ;; that the first always ends with a terminal, or that the second always ;; starts with a terminal. +;; - Permit EBNF-style notation. +;; - If the grammar has conflicts, the only way is to make the lexer return +;; different tokens for the different cases. This extra work performed by +;; the lexer can be costly and unnecessary: we perform this extra work every +;; time we find the conflicting token, regardless of whether or not the +;; difference between the various situations is relevant to the current +;; situation. E.g. we may try to determine whether a ";" is a ";-operator" +;; or a ";-separator" in a case where we're skipping over a "begin..end" pair +;; where the difference doesn't matter. For frequently occurring tokens and +;; rarely occurring conflicts, this can be a significant performance problem. +;; We could try and let the lexer return a "set of possible tokens +;; plus a refinement function" and then let parser call the refinement +;; function if needed. +;; - Make it possible to better specify the behavior in the face of +;; syntax errors. IOW provide some control over the choice of precedence +;; levels within the limits of the constraints. E.g. make it possible for +;; the grammar to specify that "begin..end" has lower precedence than +;; "Module..EndModule", so that if a "begin" is missing, scanning from the +;; "end" will stop at "Module" rather than going past it (and similarly, +;; scanning from "Module" should not stop at a spurious "end"). ;;; Code: +;; FIXME: +;; - smie-indent-comment doesn't interact well with mis-indented lines (where +;; the indent rules don't do what the user wants). Not sure what to do. + (eval-when-compile (require 'cl)) (defgroup smie nil @@ -110,7 +144,7 @@ ;; - a 2 dimensional precedence table (key word "prec2"), is a 2D ;; table recording the precedence relation (can be `<', `=', `>', or ;; nil) between each pair of tokens. -;; - a precedence-level table (key word "grammar"), which is a alist +;; - a precedence-level table (key word "grammar"), which is an alist ;; giving for each token its left and right precedence level (a ;; number or nil). This is used in `smie-grammar'. ;; The prec2 tables are only intermediate data structures: the source @@ -118,6 +152,8 @@ ;; turns them into a levels table, which is what's used by the rest of ;; the SMIE code. +(defvar smie-warning-count 0) + (defun smie-set-prec2tab (table x y val &optional override) (assert (and x y)) (let* ((key (cons x y)) @@ -129,7 +165,8 @@ ;; be able to distinguish the two cases so that overrides ;; don't hide real conflicts. (puthash key (gethash key override) table) - (display-warning 'smie (format "Conflict: %s %s/%s %s" x old val y))) + (display-warning 'smie (format "Conflict: %s %s/%s %s" x old val y)) + (incf smie-warning-count)) (puthash key val table)))) (put 'smie-precs->prec2 'pure t) @@ -173,21 +210,54 @@ one of those elements share the same precedence level and associativity." prec2))) (put 'smie-bnf->prec2 'pure t) -(defun smie-bnf->prec2 (bnf &rest precs) +(defun smie-bnf->prec2 (bnf &rest resolvers) + "Convert the BNF grammar into a prec2 table. +BNF is a list of nonterminal definitions of the form: + \(NONTERM RHS1 RHS2 ...) +where each RHS is a (non-empty) list of terminals (aka tokens) or non-terminals. +Not all grammars are accepted: +- an RHS cannot be an empty list (this is not needed, since SMIE allows all + non-terminals to match the empty string anyway). +- an RHS cannot have 2 consecutive non-terminals: between each non-terminal + needs to be a terminal (aka token). This is a fundamental limitation of + the parsing technology used (operator precedence grammar). +Additionally, conflicts can occur: +- The returned prec2 table holds constraints between pairs of + token, and for any given pair only one constraint can be + present, either: T1 < T2, T1 = T2, or T1 > T2. +- A token can either be an `opener' (something similar to an open-paren), + a `closer' (like a close-paren), or `neither' of the two (e.g. an infix + operator, or an inner token like \"else\"). +Conflicts can be resolved via RESOLVERS, which is a list of elements that can +be either: +- a precs table (see `smie-precs->prec2') to resolve conflicting constraints, +- a constraint (T1 REL T2) where REL is one of = < or >." ;; FIXME: Add repetition operator like (repeat <separator> <elems>). ;; Maybe also add (or <elem1> <elem2>...) for things like ;; (exp (exp (or "+" "*" "=" ..) exp)). ;; Basically, make it EBNF (except for the specification of a separator in - ;; the repetition). - (let ((nts (mapcar 'car bnf)) ;Non-terminals - (first-ops-table ()) - (last-ops-table ()) - (first-nts-table ()) - (last-nts-table ()) - (prec2 (make-hash-table :test 'equal)) - (override (apply 'smie-merge-prec2s - (mapcar 'smie-precs->prec2 precs))) - again) + ;; the repetition, maybe). + (let* ((nts (mapcar 'car bnf)) ;Non-terminals. + (first-ops-table ()) + (last-ops-table ()) + (first-nts-table ()) + (last-nts-table ()) + (smie-warning-count 0) + (prec2 (make-hash-table :test 'equal)) + (override + (let ((precs ()) + (over (make-hash-table :test 'equal))) + (dolist (resolver resolvers) + (cond + ((and (= 3 (length resolver)) (memq (nth 1 resolver) '(= < >))) + (smie-set-prec2tab + over (nth 0 resolver) (nth 2 resolver) (nth 1 resolver))) + ((memq (caar resolver) '(left right assoc nonassoc)) + (push resolver precs)) + (t (error "Unknown resolver %S" resolver)))) + (apply #'smie-merge-prec2s over + (mapcar 'smie-precs->prec2 precs)))) + again) (dolist (rules bnf) (let ((nt (car rules)) (last-ops ()) @@ -209,14 +279,18 @@ one of those elements share the same precedence level and associativity." ;; the trouble, and it lets the writer of the BNF ;; be a bit more sloppy by skipping uninteresting base ;; cases which are terminals but not OPs. - (assert (not (member (cadr rhs) nts))) + (when (member (cadr rhs) nts) + (error "Adjacent non-terminals: %s %s" + (car rhs) (cadr rhs))) (pushnew (cadr rhs) first-ops))) (let ((shr (reverse rhs))) (if (not (member (car shr) nts)) (pushnew (car shr) last-ops) (pushnew (car shr) last-nts) (when (consp (cdr shr)) - (assert (not (member (cadr shr) nts))) + (when (member (cadr shr) nts) + (error "Adjacent non-terminals: %s %s" + (cadr shr) (car shr))) (pushnew (cadr shr) last-ops))))) (push (cons nt first-ops) first-ops-table) (push (cons nt last-ops) last-ops-table) @@ -263,8 +337,11 @@ one of those elements share the same precedence level and associativity." (setq rhs (cdr rhs))))) ;; Keep track of which tokens are openers/closer, so they can get a nil ;; precedence in smie-prec2->grammar. - (puthash :smie-open/close-alist (smie-bnf-classify bnf) prec2) - (puthash :smie-closer-alist (smie-bnf-closer-alist bnf) prec2) + (puthash :smie-open/close-alist (smie-bnf--classify bnf) prec2) + (puthash :smie-closer-alist (smie-bnf--closer-alist bnf) prec2) + (if (> smie-warning-count 0) + (display-warning + 'smie (format "Total: %d warnings" smie-warning-count))) prec2)) ;; (defun smie-prec2-closer-alist (prec2 include-inners) @@ -319,7 +396,7 @@ one of those elements share the same precedence level and associativity." ;; openers) ;; alist))) -(defun smie-bnf-closer-alist (bnf &optional no-inners) +(defun smie-bnf--closer-alist (bnf &optional no-inners) ;; We can also build this closer-alist table from a prec2 table, ;; but it takes more work, and the order is unpredictable, which ;; is a problem for smie-close-block. @@ -347,37 +424,33 @@ from the table, e.g. the table will not include things like (\"if\" . \"else\"). (pushnew (cons (car rhs) term) alist :test #'equal))))))) (nreverse alist))) -(defun smie-bnf-classify (bnf) +(defun smie-bnf--set-class (table token class) + (let ((prev (gethash token table class))) + (puthash token + (cond + ((eq prev class) class) + ((eq prev t) t) ;Non-terminal. + (t (display-warning + 'smie + (format "token %s is both %s and %s" token class prev)) + 'neither)) + table))) + +(defun smie-bnf--classify (bnf) "Return a table classifying terminals. -Each terminal can either be an `opener', a `closer', or neither." +Each terminal can either be an `opener', a `closer', or `neither'." (let ((table (make-hash-table :test #'equal)) - (nts (mapcar #'car bnf)) (alist '())) (dolist (category bnf) - (puthash (car category) 'neither table) ;Remove non-terminals. + (puthash (car category) t table)) ;Mark non-terminals. + (dolist (category bnf) (dolist (rhs (cdr category)) (if (null (cdr rhs)) - (puthash (pop rhs) 'neither table) - (let ((first (pop rhs))) - (puthash first - (if (memq (gethash first table) '(nil opener)) - 'opener - (unless (member first nts) - (error "SMIE: token %s is both opener and non-opener" - first)) - 'neither) - table)) - (while (cdr rhs) - (puthash (pop rhs) 'neither table)) ;Remove internals. - (let ((last (pop rhs))) - (puthash last - (if (memq (gethash last table) '(nil closer)) - 'closer - (unless (member last nts) - (error "SMIE: token %s is both closer and non-closer" - last)) - 'neither) - table))))) + (smie-bnf--set-class table (pop rhs) 'neither) + (smie-bnf--set-class table (pop rhs) 'opener) + (while (cdr rhs) ;Remove internals. + (smie-bnf--set-class table (pop rhs) 'neither)) + (smie-bnf--set-class table (pop rhs) 'closer)))) (maphash (lambda (tok v) (when (memq v '(closer opener)) (push (cons tok v) alist))) @@ -506,7 +579,7 @@ PREC2 is a table as returned by `smie-precs->prec2' or (smie-debug--describe-cycle table (smie-debug--prec2-cycle csts))))) (incf i 10)) - ;; Propagate equalities back to their source. + ;; Propagate equality constraints back to their sources. (dolist (eq (nreverse eqs)) (when (null (cadr eq)) ;; There's an equality constraint, but we still haven't given @@ -668,8 +741,22 @@ Possible return values: ;; Keep looking as long as we haven't matched the ;; topmost operator. (levels - (if (numberp (funcall op-forw toklevels)) - (push toklevels levels))) + (cond + ((numberp (funcall op-forw toklevels)) + (push toklevels levels)) + ;; FIXME: For some languages, we can express the grammar + ;; OK, but next-sexp doesn't stop where we'd want it to. + ;; E.g. in SML, we'd want to stop right in front of + ;; "local" if we're scanning (both forward and backward) + ;; from a "val/fun/..." at the same level. + ;; Same for Pascal/Modula2's "procedure" w.r.t + ;; "type/var/const". + ;; + ;; ((and (functionp (cadr (funcall op-forw toklevels))) + ;; (funcall (cadr (funcall op-forw toklevels)) + ;; levels)) + ;; (setq levels nil)) + )) ;; We matched the topmost operator. If the new operator ;; is the last in the corresponding BNF rule, we're done. ((not (numberp (funcall op-forw toklevels))) @@ -735,7 +822,7 @@ Possible return values: (indirect-function 'smie-op-left) halfsexp)) -;;; Miscellanous commands using the precedence parser. +;;; Miscellaneous commands using the precedence parser. (defun smie-backward-sexp-command (&optional n) "Move backward through N logical elements." @@ -915,7 +1002,7 @@ This uses SMIE's tables and is expected to be placed on `post-self-insert-hook'. ;; anything else than this trigger char, lest we'd blink ;; both when inserting the trigger char and when ;; inserting a subsequent trigger char like SPC. - (or (eq (point) pos) + (or (eq (char-before) last-command-event) (not (memq (char-before) smie-blink-matching-triggers))) (or smie-blink-matching-inners @@ -956,7 +1043,7 @@ function should return nil for arguments it does not expect. OFFSET can be: nil use the default indentation rule. -`(column . COLUMN) indent to column COLUMN. +\(column . COLUMN) indent to column COLUMN. NUMBER offset by NUMBER, relative to a base token which is the current token for :after and its parent for :before. @@ -998,7 +1085,10 @@ the beginning of a line." (unless (numberp (cadr (assoc tok smie-grammar))) (goto-char pos)) (setq smie--parent - (smie-backward-sexp 'halfsexp)))))) + (or (smie-backward-sexp 'halfsexp) + (let (res) + (while (null (setq res (smie-backward-sexp)))) + (list nil (point) (nth 2 res))))))))) (defun smie-rule-parent-p (&rest parents) "Return non-nil if the current token's parent is among PARENTS. @@ -1403,6 +1493,10 @@ should not be computed on the basis of the following token." (and (nth 4 (syntax-ppss)) 'noindent)) +(defun smie-indent-inside-string () + (and (nth 3 (syntax-ppss)) + 'noindent)) + (defun smie-indent-after-keyword () ;; Indentation right after a special keyword. (save-excursion @@ -1476,8 +1570,9 @@ should not be computed on the basis of the following token." (defvar smie-indent-functions '(smie-indent-fixindent smie-indent-bob smie-indent-close - smie-indent-comment smie-indent-comment-continue smie-indent-comment-close - smie-indent-comment-inside smie-indent-keyword smie-indent-after-keyword + smie-indent-comment smie-indent-comment-continue smie-indent-comment-close + smie-indent-comment-inside smie-indent-inside-string + smie-indent-keyword smie-indent-after-keyword smie-indent-exps) "Functions to compute the indentation. Each function is called with no argument, shouldn't move point, and should @@ -1547,8 +1642,9 @@ KEYWORDS are additional arguments, which can use the following keywords: (while (setq closer (pop closers)) (unless (and closers ;; FIXME: this eliminates prefixes of other - ;; closers, but we should probably elimnate - ;; prefixes of other keywords as well. + ;; closers, but we should probably + ;; eliminate prefixes of other keywords + ;; as well. (string-prefix-p closer (car closers))) (push (aref closer (1- (length closer))) triggers))) (delete-dups triggers))))))) |