summaryrefslogtreecommitdiff
path: root/lisp/emacs-lisp/syntax.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/emacs-lisp/syntax.el')
-rw-r--r--lisp/emacs-lisp/syntax.el775
1 files changed, 775 insertions, 0 deletions
diff --git a/lisp/emacs-lisp/syntax.el b/lisp/emacs-lisp/syntax.el
new file mode 100644
index 00000000000..e1be3015838
--- /dev/null
+++ b/lisp/emacs-lisp/syntax.el
@@ -0,0 +1,775 @@
+;;; syntax.el --- helper functions to find syntactic context -*- lexical-binding: t -*-
+
+;; Copyright (C) 2000-2022 Free Software Foundation, Inc.
+
+;; Maintainer: emacs-devel@gnu.org
+;; Keywords: internal
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; The main exported function is `syntax-ppss'. You might also need
+;; to call `syntax-ppss-flush-cache' or to add it to
+;; before-change-functions'(although this is automatically done by
+;; syntax-ppss when needed, but that might fail if syntax-ppss is
+;; called in a context where before-change-functions is temporarily
+;; let-bound to nil).
+
+;;; Todo:
+
+;; - do something about the case where the syntax-table is changed.
+;; This typically happens with tex-mode and its `$' operator.
+;; - new functions `syntax-state', ... to replace uses of parse-partial-state
+;; with something higher-level (similar to syntax-ppss-context).
+;; - interaction with mmm-mode.
+
+;;; Code:
+
+;; Note: PPSS stands for `parse-partial-sexp state'
+
+(eval-when-compile (require 'cl-lib))
+
+;;; Applying syntax-table properties where needed.
+
+(defvar syntax-propertize-function nil
+ ;; Rather than a -functions hook, this is a -function because it's easier
+ ;; to do a single scan than several scans: with multiple scans, one cannot
+ ;; assume that the text before point has been propertized, so syntax-ppss
+ ;; gives unreliable results (and stores them in its cache to boot, so we'd
+ ;; have to flush that cache between each function, and we couldn't use
+ ;; syntax-ppss-flush-cache since that would not only flush the cache but also
+ ;; reset syntax-propertize--done which should not be done in this case).
+ "Mode-specific function to apply `syntax-table' text properties.
+It is the work horse of `syntax-propertize', which is called by things like
+Font-Lock and indentation.
+
+It is given two arguments, START and END: the start and end of the text to
+which `syntax-table' might need to be applied. Major modes can use this to
+override the buffer's syntax table for special syntactic constructs that
+cannot be handled just by the buffer's syntax-table.
+
+The specified function may call `syntax-ppss' on any position
+before END, but if it calls `syntax-ppss' on some
+position and later modifies the buffer on some earlier position,
+then it is its responsibility to call `syntax-ppss-flush-cache' to flush
+the now obsolete ppss info from the cache.
+
+Note: When this variable is a function, it must apply _all_ the
+`syntax-table' properties needed in the given text interval.
+Using both this function and other means to apply these
+properties won't work properly.")
+
+(defvar syntax-propertize-chunk-size 500)
+
+(defvar-local syntax-propertize-extend-region-functions
+ '(syntax-propertize-wholelines)
+ "Special hook run just before proceeding to propertize a region.
+This is used to allow major modes to help `syntax-propertize' find safe buffer
+positions as beginning and end of the propertized region. Its most common use
+is to solve the problem of /identification/ of multiline elements by providing
+a function that tries to find such elements and move the boundaries such that
+they do not fall in the middle of one.
+Each function is called with two arguments (START and END) and it should return
+either a cons (NEW-START . NEW-END) or nil if no adjustment should be made.
+These functions are run in turn repeatedly until they all return nil.
+Put first the functions more likely to cause a change and cheaper to compute.")
+;; Mark it as a special hook which doesn't use any global setting
+;; (i.e. doesn't obey the element t in the buffer-local value).
+
+(cl-defstruct (ppss
+ (:constructor make-ppss)
+ (:copier nil)
+ (:type list))
+ (depth nil :documentation "Depth in parens.")
+ (innermost-start
+ nil :documentation
+ "Character address of start of innermost containing list; nil if none.")
+ (last-complete-sexp-start
+ nil :documentation
+ "Character address of start of last complete sexp terminated.")
+ (string-terminator nil :documentation "\
+Non-nil if inside a string.
+\(it is the character that will terminate the string, or t if the
+string should be terminated by a generic string delimiter.)")
+ (comment-depth nil :documentation "\
+nil if outside a comment, t if inside a non-nestable comment,
+else an integer (the current comment nesting).")
+ (quoted-p nil :documentation "t if following a quote character.")
+ (min-depth
+ nil :documentation "The minimum depth in parens encountered during this scan.")
+ (comment-style nil :documentation "Style of comment, if any.")
+ (comment-or-string-start
+ nil :documentation
+ "Character address of start of comment or string; nil if not in one.")
+ (open-parens
+ nil :documentation
+ "List of positions of currently open parens, outermost first.")
+ (two-character-syntax nil :documentation "\
+When the last position scanned holds the first character of a
+\(potential) two character construct, the syntax of that position,
+otherwise nil. That construct can be a two character comment
+delimiter or an Escaped or Char-quoted character."))
+
+(defvar syntax-wholeline-max 10000
+ "Maximum line length for syntax operations.
+If lines are longer than that, syntax operations will treat them as chunks
+of this size. Misfontification may then occur.
+This is a tradeoff between correctly applying the syntax rules,
+and avoiding major slowdown on pathologically long lines.")
+
+(defun syntax--lbp (&optional arg)
+ "Like `line-beginning-position' but obeying `syntax-wholeline-max'."
+ (let ((pos (point))
+ (res (line-beginning-position arg)))
+ (cond
+ ((< (abs (- pos res)) syntax-wholeline-max) res)
+ ;; For lines that are too long, round to the nearest multiple of
+ ;; `syntax-wholeline-max'. We use rounding rather than just
+ ;; (min res (+ pos syntax-wholeline-max)) so that repeated calls
+ ;; to `syntax-propertize-wholelines' don't keep growing the bounds,
+ ;; i.e. it really behaves like additional line-breaks.
+ ((< res pos)
+ (let ((max syntax-wholeline-max))
+ (max (point-min) (* max (truncate pos max)))))
+ (t
+ (let ((max syntax-wholeline-max))
+ (min (point-max) (* max (ceiling pos max))))))))
+
+(defun syntax-propertize-wholelines (beg end)
+ "Extend the region delimited by BEG and END to whole lines.
+This function is useful for
+`syntax-propertize-extend-region-functions';
+see Info node `(elisp) Syntax Properties'."
+ ;; This let-binding was taken from
+ ;; `font-lock-extend-region-wholelines' where it was used to avoid
+ ;; inf-looping (Bug#21615) but for some reason it was not applied
+ ;; here in syntax.el and was used only for the "beg" side.
+ (let ((inhibit-field-text-motion t))
+ (let ((new-beg (progn (goto-char beg)
+ (if (bolp) beg
+ (syntax--lbp))))
+ (new-end (progn (goto-char end)
+ (if (bolp) end
+ (syntax--lbp 2)))))
+ (unless (and (eql beg new-beg) (eql end new-end))
+ (cons new-beg new-end)))))
+
+(defun syntax-propertize-multiline (beg end)
+ "Let `syntax-propertize' pay attention to the syntax-multiline property."
+ (when (and (> beg (point-min))
+ (get-text-property (1- beg) 'syntax-multiline))
+ (setq beg (or (previous-single-property-change beg 'syntax-multiline)
+ (point-min))))
+ ;;
+ (when (get-text-property end 'syntax-multiline)
+ (setq end (or (text-property-any end (point-max)
+ 'syntax-multiline nil)
+ (point-max))))
+ (cons beg end))
+
+(defun syntax-propertize--shift-groups-and-backrefs (re n)
+ (let ((new-re (replace-regexp-in-string
+ "\\\\(\\?\\([0-9]+\\):"
+ (lambda (s)
+ (replace-match
+ (number-to-string
+ (+ n (string-to-number (match-string 1 s))))
+ t t s 1))
+ re t t))
+ (pos 0))
+ (while (string-match "\\\\\\([0-9]+\\)" new-re pos)
+ (setq pos (+ 1 (match-beginning 1)))
+ (when (save-match-data
+ ;; With \N, the \ must be in a subregexp context, i.e.,
+ ;; not in a character class or in a \{\} repetition.
+ (subregexp-context-p new-re (match-beginning 0)))
+ (let ((shifted (+ n (string-to-number (match-string 1 new-re)))))
+ (when (> shifted 9)
+ (error "There may be at most nine back-references"))
+ (setq new-re (replace-match (number-to-string shifted)
+ t t new-re 1)))))
+ new-re))
+
+(defmacro syntax-propertize-precompile-rules (&rest rules)
+ "Return a precompiled form of RULES to pass to `syntax-propertize-rules'.
+The arg RULES can be of the same form as in `syntax-propertize-rules'.
+The return value is an object that can be passed as a rule to
+`syntax-propertize-rules'.
+I.e. this is useful only when you want to share rules among several
+`syntax-propertize-function's."
+ (declare (debug syntax-propertize-rules))
+ ;; Precompile? Yeah, right!
+ ;; Seriously, tho, this is a macro for 2 reasons:
+ ;; - we could indeed do some pre-compilation at some point in the future,
+ ;; e.g. fi/when we switch to a DFA-based implementation of
+ ;; syntax-propertize-rules.
+ ;; - this lets Edebug properly annotate the expressions inside RULES.
+ `',rules)
+
+(defmacro syntax-propertize-rules (&rest rules)
+ "Make a function that applies RULES for use in `syntax-propertize-function'.
+The function will scan the buffer, applying the rules where they match.
+The buffer is scanned a single time, like \"lex\" would, rather than once
+per rule.
+
+Each RULE can be a symbol, in which case that symbol's value should be,
+at macro-expansion time, a precompiled set of rules, as returned
+by `syntax-propertize-precompile-rules'.
+
+Otherwise, RULE should have the form (REGEXP HIGHLIGHT1 ... HIGHLIGHTn), where
+REGEXP is an expression (evaluated at time of macro-expansion) that returns
+a regexp, and where HIGHLIGHTs have the form (NUMBER SYNTAX) which means to
+apply the property SYNTAX to the chars matched by the subgroup NUMBER
+of the regular expression, if NUMBER did match.
+SYNTAX is an expression that returns a value to apply as `syntax-table'
+property. Some expressions are handled specially:
+- if SYNTAX is a string, then it is converted with `string-to-syntax';
+- if SYNTAX has the form (prog1 EXP . EXPS) then the value returned by EXP
+ will be applied to the buffer before running EXPS and if EXP is a string it
+ is also converted with `string-to-syntax'.
+The SYNTAX expression is responsible to save the `match-data' if needed
+for subsequent HIGHLIGHTs.
+Also SYNTAX is free to move point, in which case RULES may not be applied to
+some parts of the text or may be applied several times to other parts.
+
+Note: There may be at most nine back-references in the REGEXPs of
+all RULES in total."
+ (declare (debug (&rest &or symbolp ;FIXME: edebug this eval step.
+ (form &rest
+ (numberp
+ [&or stringp ;FIXME: Use &wrap
+ ("prog1" [&or stringp def-form] def-body)
+ def-form])))))
+ (let ((newrules nil))
+ (while rules
+ (if (symbolp (car rules))
+ (setq rules (append (symbol-value (pop rules)) rules))
+ (push (pop rules) newrules)))
+ (setq rules (nreverse newrules)))
+ (let* ((offset 0)
+ (branches '())
+ ;; We'd like to use a real DFA-based lexer, usually, but since Emacs
+ ;; doesn't have one yet, we fallback on building one large regexp
+ ;; and use groups to determine which branch of the regexp matched.
+ (re
+ (mapconcat
+ (lambda (rule)
+ (let* ((orig-re (eval (car rule) t))
+ (re orig-re))
+ (when (and (assq 0 rule) (cdr rules))
+ ;; If there's more than 1 rule, and the rule want to apply
+ ;; highlight to match 0, create an extra group to be able to
+ ;; tell when *this* match 0 has succeeded.
+ (cl-incf offset)
+ (setq re (concat "\\(" re "\\)")))
+ (setq re (syntax-propertize--shift-groups-and-backrefs re offset))
+ (let ((code '())
+ (condition
+ (cond
+ ((assq 0 rule) (if (zerop offset) t
+ `(match-beginning ,offset)))
+ ((and (cdr rule) (null (cddr rule)))
+ `(match-beginning ,(+ offset (car (cadr rule)))))
+ (t
+ `(or ,@(mapcar
+ (lambda (case)
+ `(match-beginning ,(+ offset (car case))))
+ (cdr rule))))))
+ (nocode t)
+ (offset offset))
+ ;; If some of the subgroup rules include Elisp code, then we
+ ;; need to set the match-data so it's consistent with what the
+ ;; code expects. If not, then we can simply use shifted
+ ;; offset in our own code.
+ (unless (zerop offset)
+ (dolist (case (cdr rule))
+ (unless (stringp (cadr case))
+ (setq nocode nil)))
+ (unless nocode
+ (push `(let ((md (match-data 'ints)))
+ ;; Keep match 0 as is, but shift everything else.
+ (setcdr (cdr md) (nthcdr ,(* (1+ offset) 2) md))
+ (set-match-data md))
+ code)
+ (setq offset 0)))
+ ;; Now construct the code for each subgroup rules.
+ (dolist (case (cdr rule))
+ (cl-assert (null (cddr case)))
+ (let* ((gn (+ offset (car case)))
+ (action (nth 1 case))
+ (thiscode
+ (cond
+ ((stringp action)
+ `((put-text-property
+ (match-beginning ,gn) (match-end ,gn)
+ 'syntax-table
+ ',(string-to-syntax action))))
+ ((eq (car-safe action) 'ignore)
+ (cdr action))
+ ((eq (car-safe action) 'prog1)
+ (if (stringp (nth 1 action))
+ `((put-text-property
+ (match-beginning ,gn) (match-end ,gn)
+ 'syntax-table
+ ',(string-to-syntax (nth 1 action)))
+ ,@(nthcdr 2 action))
+ `((let ((mb (match-beginning ,gn))
+ (me (match-end ,gn)))
+ ,(macroexp-let2 nil syntax (nth 1 action)
+ `(progn
+ (if ,syntax
+ (put-text-property
+ mb me 'syntax-table ,syntax))
+ ,@(nthcdr 2 action)))))))
+ (t
+ `((let ((mb (match-beginning ,gn))
+ (me (match-end ,gn))
+ (syntax ,action))
+ (if syntax
+ (put-text-property
+ mb me 'syntax-table syntax))))))))
+
+ (if (or (not (cddr rule)) (zerop gn))
+ (setq code (nconc (nreverse thiscode) code))
+ (push `(if (match-beginning ,gn)
+ ;; Try and generate clean code with no
+ ;; extraneous progn.
+ ,(if (null (cdr thiscode))
+ (car thiscode)
+ `(progn ,@thiscode)))
+ code))))
+ (push (cons condition (nreverse code))
+ branches))
+ (cl-incf offset (regexp-opt-depth orig-re))
+ re))
+ rules
+ "\\|")))
+ `(lambda (start end)
+ (goto-char start)
+ (while (and (< (point) end)
+ (re-search-forward ,re end t))
+ (cond ,@(nreverse branches))))))
+
+(defun syntax-propertize-via-font-lock (keywords)
+ "Propertize for syntax using font-lock syntax.
+KEYWORDS obeys the format used in `font-lock-syntactic-keywords'.
+The return value is a function (with two parameters, START and
+END) suitable for `syntax-propertize-function'."
+ (lambda (start end)
+ (with-no-warnings
+ (let ((font-lock-syntactic-keywords keywords))
+ (font-lock-fontify-syntactic-keywords-region start end)
+ ;; In case it was eval'd/compiled.
+ (setq keywords font-lock-syntactic-keywords)))))
+
+(defvar-local syntax-ppss-table nil
+ "Syntax-table to use during `syntax-ppss', if any.")
+
+(defun syntax-propertize--in-process-p ()
+ "Non-nil if we're inside `syntax-propertize'.
+This is used to avoid infinite recursion as well as to handle cases where
+`syntax-ppss' is called when the final `syntax-table' properties have not
+yet been setup, in which case we may end up putting invalid info into the cache.
+It's also used so that `syntax-ppss-flush-cache' can be used from within
+`syntax-propertize' without ruining the `syntax-table' already set."
+ (eq syntax-propertize--done most-positive-fixnum))
+
+(defvar-local syntax-ppss--updated-cache nil)
+
+(defun syntax-propertize (pos)
+ "Ensure that syntax-table properties are set until POS (a buffer point)."
+ (when (< syntax-propertize--done pos)
+ (if (memq syntax-propertize-function '(nil ignore))
+ (setq syntax-propertize--done (max (point-max) pos))
+ ;; (message "Needs to syntax-propertize from %s to %s"
+ ;; syntax-propertize--done pos)
+ (setq-local parse-sexp-lookup-properties t)
+ (when (< syntax-propertize--done (point-min))
+ ;; *Usually* syntax-propertize is called via syntax-ppss which
+ ;; takes care of adding syntax-ppss-flush-cache to b-c-f, but this
+ ;; is not *always* the case, so since we share a single "flush" function
+ ;; between syntax-ppss and syntax-propertize, we also have to make
+ ;; sure the flush function is installed here (bug#29767).
+ (add-hook 'before-change-functions
+ #'syntax-ppss-flush-cache 99 t))
+ (save-excursion
+ (with-silent-modifications
+ (with-syntax-table (or syntax-ppss-table (syntax-table))
+ (make-local-variable 'syntax-propertize--done) ;Just in case!
+ ;; Make sure we let-bind it only buffer-locally.
+ (make-local-variable 'syntax-ppss--updated-cache)
+ (let* ((start (max (min syntax-propertize--done (point-max))
+ (point-min)))
+ (end (max pos
+ (min (point-max)
+ (+ start syntax-propertize-chunk-size))))
+ (first t)
+ (repeat t)
+ (syntax-ppss--updated-cache nil))
+ (while repeat
+ (setq repeat nil)
+ (run-hook-wrapped
+ 'syntax-propertize-extend-region-functions
+ (lambda (f)
+ ;; Bind `syntax-propertize--done' to avoid recursion!
+ (let* ((syntax-propertize--done most-positive-fixnum)
+ (new (funcall f start end)))
+ (if (or (null new)
+ (and (>= (car new) start) (<= (cdr new) end)))
+ nil
+ (setq start (car new))
+ (setq end (cdr new))
+ ;; If there's been a change, we should go through the
+ ;; list again since this new position may
+ ;; warrant a different answer from one of the funs we've
+ ;; already seen.
+ (unless first (setq repeat t))))
+ (setq first nil))))
+ ;; Flush ppss cache between the original value of `start' and that
+ ;; set above by syntax-propertize-extend-region-functions.
+ (syntax-ppss-flush-cache start)
+ ;; Move the limit before calling the function, so it's
+ ;; done in case of errors.
+ (setq syntax-propertize--done end)
+ ;; (message "syntax-propertizing from %s to %s" start end)
+ (remove-text-properties start end
+ '(syntax-table nil syntax-multiline nil))
+ ;; Bind `syntax-propertize--done' to avoid recursion!
+ (let ((syntax-propertize--done most-positive-fixnum))
+ (funcall syntax-propertize-function start end)
+ (when syntax-ppss--updated-cache
+ ;; `syntax-ppss' was called and updated the cache while we
+ ;; were propertizing so we need to flush the part of the
+ ;; cache that may have been rendered out-of-date by the new
+ ;; properties.
+ ;; We used to require syntax-propertize-functions to do that
+ ;; manually when applicable, but nowadays the `syntax-ppss'
+ ;; cache can be updated by too many functions, so the author
+ ;; of the syntax-propertize-function may not be aware it
+ ;; can happen.
+ (syntax-ppss-flush-cache start))))))))))
+
+;;; Link syntax-propertize with syntax.c.
+
+(defvar syntax-propertize-chunks
+ ;; We're not sure how far we'll go. In my tests, using chunks of 2000
+ ;; brings the overhead to something negligible. Passing ‘charpos’ directly
+ ;; also works (basically works line-by-line) but results in an overhead which
+ ;; I thought was a bit too high (like around 50%).
+ 2000)
+
+(defun internal--syntax-propertize (charpos)
+ ;; FIXME: Called directly from C.
+ (save-match-data
+ (syntax-propertize (min (+ syntax-propertize-chunks charpos) (point-max)))))
+
+;;; Incrementally compute and memoize parser state.
+
+(defsubst syntax-ppss-depth (ppss)
+ (nth 0 ppss))
+
+(defun syntax-ppss-toplevel-pos (ppss)
+ "Get the latest syntactically outermost position found in a syntactic scan.
+PPSS is a scan state, as returned by `parse-partial-sexp' or `syntax-ppss'.
+An \"outermost position\" means one that it is outside of any syntactic entity:
+outside of any parentheses, comments, or strings encountered in the scan.
+If no such position is recorded in PPSS (because the end of the scan was
+itself at the outermost level), return nil."
+ (or (car (nth 9 ppss))
+ (nth 8 ppss)))
+
+(defsubst syntax-ppss-context (ppss)
+ "Say whether PPSS is a string, a comment, or something else.
+If PPSS is a string, the symbol `string' is returned. If it's a
+comment, the symbol `comment' is returned. If it's something
+else, nil is returned."
+ (cond
+ ((nth 3 ppss) 'string)
+ ((nth 4 ppss) 'comment)
+ (t nil)))
+
+(defvar syntax-ppss-max-span 20000
+ "Threshold below which cache info is deemed unnecessary.
+We try to make sure that cache entries are at least this far apart
+from each other, to avoid keeping too much useless info.")
+
+(defvar syntax-begin-function nil
+ "Function to move back outside of any comment/string/paren.
+This function should move the cursor back to some syntactically safe
+point (where the PPSS is equivalent to nil).")
+(make-obsolete-variable 'syntax-begin-function nil "25.1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Several caches.
+;;
+;; Because `syntax-ppss' is equivalent to (parse-partial-sexp
+;; (POINT-MIN) x), we need either to empty the cache when we narrow
+;; the buffer, which is suboptimal, or we need to use several caches.
+;; We use two of them, one for widened buffer, and one for narrowing.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defvar-local syntax-ppss-wide nil
+ "Cons of two elements (LAST . CACHE).
+Where LAST is a pair (LAST-POS . LAST-PPS) caching the last invocation
+and CACHE is a list of (POS . PPSS) pairs, in decreasing POS order.
+These are valid when the buffer has no restriction.")
+
+(defvar-local syntax-ppss-narrow nil
+ "Same as `syntax-ppss-wide' but for a narrowed buffer.")
+
+(defvar-local syntax-ppss-narrow-start nil
+ "Start position of the narrowing for `syntax-ppss-narrow'.")
+
+(define-obsolete-function-alias 'syntax-ppss-after-change-function
+ #'syntax-ppss-flush-cache "27.1")
+(defun syntax-ppss-flush-cache (beg &rest _ignored)
+ "Flush the cache of `syntax-ppss' starting at position BEG."
+ ;; Set syntax-propertize to refontify anything past beg.
+ (unless (syntax-propertize--in-process-p)
+ (setq syntax-propertize--done (min beg syntax-propertize--done)))
+ ;; Flush invalid cache entries.
+ (dolist (cell (list syntax-ppss-wide syntax-ppss-narrow))
+ (pcase cell
+ (`(,last . ,cache)
+ (while (and cache (> (caar cache) beg))
+ (setq cache (cdr cache)))
+ ;; Throw away `last' value if made invalid.
+ (when (< beg (or (car last) 0))
+ ;; If syntax-begin-function jumped to BEG, then the old state at BEG can
+ ;; depend on the text after BEG (which is presumably changed). So if
+ ;; BEG=(car (nth 10 syntax-ppss-last)) don't reuse that data because the
+ ;; assumed nil state at BEG may not be valid any more.
+ (if (<= beg (or (syntax-ppss-toplevel-pos (cdr last))
+ (nth 3 last)
+ 0))
+ (setq last nil)
+ (setcar last nil)))
+ ;; Unregister if there's no cache left. Sadly this doesn't work
+ ;; because `before-change-functions' is temporarily bound to nil here.
+ ;; (unless cache
+ ;; (remove-hook 'before-change-functions #'syntax-ppss-flush-cache t))
+ (setcar cell last)
+ (setcdr cell cache)))
+ ))
+
+;; FIXME: Explain this variable. Currently only its last (5th) slot is used.
+;; Perhaps the other slots should be removed?
+;; This variable is only used when `syntax-begin-function' is used and
+;; will hence be removed together with `syntax-begin-function'.
+(defvar syntax-ppss-stats
+ [(0 . 0) (0 . 0) (0 . 0) (0 . 0) (0 . 0) (2 . 2500)]
+ "Statistics about which case is more/less frequent in `syntax-ppss'.
+The 5th slot drives the heuristic to use `syntax-begin-function'.
+The rest is only useful if you're interested in tweaking the algorithm.")
+
+(defun syntax-ppss-stats ()
+ (mapcar (lambda (x)
+ (condition-case nil
+ (cons (car x) (/ (cdr x) (car x)))
+ (error nil)))
+ syntax-ppss-stats))
+(defun syntax-ppss--update-stats (i old new)
+ (let ((pair (aref syntax-ppss-stats i)))
+ (cl-incf (car pair))
+ (cl-incf (cdr pair) (- new old))))
+
+(defun syntax-ppss--data ()
+ (if (eq (point-min) 1)
+ (progn
+ (unless syntax-ppss-wide
+ (setq syntax-ppss-wide (cons nil nil)))
+ syntax-ppss-wide)
+ (unless (eq syntax-ppss-narrow-start (point-min))
+ (setq syntax-ppss-narrow-start (point-min))
+ (setq syntax-ppss-narrow (cons nil nil)))
+ syntax-ppss-narrow))
+
+(defun syntax-ppss (&optional pos)
+ "Parse-Partial-Sexp State at POS, defaulting to point.
+If POS is given, this function moves point to POS.
+
+The returned value is the same as that of `parse-partial-sexp'
+run from `point-min' to POS except that values at positions 2 and 6
+in the returned list (counting from 0) cannot be relied upon.
+
+It is necessary to call `syntax-ppss-flush-cache' explicitly if
+this function is called while `before-change-functions' is
+temporarily let-bound, or if the buffer is modified without
+running the hook."
+ ;; Default values.
+ (unless pos (setq pos (point)))
+ (syntax-propertize pos)
+ ;;
+ (with-syntax-table (or syntax-ppss-table (syntax-table))
+ (let* ((cell (syntax-ppss--data))
+ (ppss-last (car cell))
+ (ppss-cache (cdr cell))
+ (old-ppss (cdr ppss-last))
+ (old-pos (car ppss-last))
+ (ppss nil)
+ (pt-min (point-min)))
+ (if (and old-pos (> old-pos pos)) (setq old-pos nil))
+ ;; Use the OLD-POS if usable and close. Don't update the `last' cache.
+ (condition-case nil
+ (if (and old-pos (< (- pos old-pos)
+ ;; The time to use syntax-begin-function and
+ ;; find PPSS is assumed to be about 2 * distance.
+ (let ((pair (aref syntax-ppss-stats 5)))
+ (/ (* 2 (cdr pair)) (car pair)))))
+ (progn
+ (syntax-ppss--update-stats 0 old-pos pos)
+ (parse-partial-sexp old-pos pos nil nil old-ppss))
+
+ (cond
+ ;; Use OLD-PPSS if possible and close enough.
+ ((and (not old-pos) old-ppss
+ ;; If `pt-min' is too far from `pos', we could try to use
+ ;; other positions in (nth 9 old-ppss), but that doesn't
+ ;; seem to happen in practice and it would complicate this
+ ;; code (and the before-change-function code even more).
+ ;; But maybe it would be useful in "degenerate" cases such
+ ;; as when the whole file is wrapped in a set
+ ;; of parentheses.
+ (setq pt-min (or (syntax-ppss-toplevel-pos old-ppss)
+ (nth 2 old-ppss)))
+ (<= pt-min pos) (< (- pos pt-min) syntax-ppss-max-span))
+ (syntax-ppss--update-stats 1 pt-min pos)
+ (setq ppss (parse-partial-sexp pt-min pos)))
+ ;; The OLD-* data can't be used. Consult the cache.
+ (t
+ (let ((cache-pred nil)
+ (cache ppss-cache)
+ (pt-min (point-min))
+ ;; I differentiate between PT-MIN and PT-BEST because
+ ;; I feel like it might be important to ensure that the
+ ;; cache is only filled with 100% sure data (whereas
+ ;; syntax-begin-function might return incorrect data).
+ ;; Maybe that's just stupid.
+ (pt-best (point-min))
+ (ppss-best nil))
+ ;; look for a usable cache entry.
+ (while (and cache (< pos (caar cache)))
+ (setq cache-pred cache)
+ (setq cache (cdr cache)))
+ (if cache (setq pt-min (caar cache) ppss (cdar cache)))
+
+ ;; Setup the before-change function if necessary.
+ (unless (or ppss-cache ppss-last)
+ ;; Note: combine-change-calls-1 needs to be kept in sync
+ ;; with this!
+ (add-hook 'before-change-functions
+ #'syntax-ppss-flush-cache
+ ;; We should be either the very last function on
+ ;; before-change-functions or the very first on
+ ;; after-change-functions.
+ 99 t))
+
+ ;; Use the best of OLD-POS and CACHE.
+ (if (or (not old-pos) (< old-pos pt-min))
+ (setq pt-best pt-min ppss-best ppss)
+ (syntax-ppss--update-stats 4 old-pos pos)
+ (setq pt-best old-pos ppss-best old-ppss))
+
+ ;; Use the `syntax-begin-function' if available.
+ ;; We could try using that function earlier, but:
+ ;; - The result might not be 100% reliable, so it's better to use
+ ;; the cache if available.
+ ;; - The function might be slow.
+ ;; - If this function almost always finds a safe nearby spot,
+ ;; the cache won't be populated, so consulting it is cheap.
+ (when (and syntax-begin-function
+ (progn (goto-char pos)
+ (funcall syntax-begin-function)
+ ;; Make sure it's better.
+ (> (point) pt-best))
+ ;; Simple sanity checks.
+ (< (point) pos) ; backward-paragraph can fail here.
+ (not (memq (get-text-property (point) 'face)
+ '(font-lock-string-face font-lock-doc-face
+ font-lock-comment-face))))
+ (syntax-ppss--update-stats 5 (point) pos)
+ (setq pt-best (point) ppss-best nil))
+
+ (cond
+ ;; Quick case when we found a nearby pos.
+ ((< (- pos pt-best) syntax-ppss-max-span)
+ (syntax-ppss--update-stats 2 pt-best pos)
+ (setq ppss (parse-partial-sexp pt-best pos nil nil ppss-best)))
+ ;; Slow case: compute the state from some known position and
+ ;; populate the cache so we won't need to do it again soon.
+ (t
+ (syntax-ppss--update-stats 3 pt-min pos)
+ (setq syntax-ppss--updated-cache t)
+
+ ;; If `pt-min' is too far, add a few intermediate entries.
+ (while (> (- pos pt-min) (* 2 syntax-ppss-max-span))
+ (setq ppss (parse-partial-sexp
+ pt-min (setq pt-min (/ (+ pt-min pos) 2))
+ nil nil ppss))
+ (push (cons pt-min ppss)
+ (if cache-pred (cdr cache-pred) ppss-cache)))
+
+ ;; Compute the actual return value.
+ (setq ppss (parse-partial-sexp pt-min pos nil nil ppss))
+
+ ;; Debugging check.
+ ;; (let ((real-ppss (parse-partial-sexp (point-min) pos)))
+ ;; (setcar (last ppss 4) 0)
+ ;; (setcar (last real-ppss 4) 0)
+ ;; (setcar (last ppss 8) nil)
+ ;; (setcar (last real-ppss 8) nil)
+ ;; (unless (equal ppss real-ppss)
+ ;; (message "!!Syntax: %s != %s" ppss real-ppss)
+ ;; (setq ppss real-ppss)))
+
+ ;; Store it in the cache.
+ (let ((pair (cons pos ppss)))
+ (if cache-pred
+ (if (> (- (caar cache-pred) pos) syntax-ppss-max-span)
+ (push pair (cdr cache-pred))
+ (setcar cache-pred pair))
+ (if (or (null ppss-cache)
+ (> (- (caar ppss-cache) pos)
+ syntax-ppss-max-span))
+ (push pair ppss-cache)
+ (setcar ppss-cache pair)))))))))
+
+ (setq syntax-ppss--updated-cache t)
+ (setq ppss-last (cons pos ppss))
+ (setcar cell ppss-last)
+ (setcdr cell ppss-cache)
+ ppss)
+ (args-out-of-range
+ ;; If the buffer is more narrowed than when we built the cache,
+ ;; we may end up calling parse-partial-sexp with a position before
+ ;; point-min. In that case, just parse from point-min assuming
+ ;; a nil state.
+ (parse-partial-sexp (point-min) pos))))))
+
+;; Debugging functions
+
+(defun syntax-ppss-debug ()
+ (let ((pt nil)
+ (min-diffs nil))
+ (dolist (x (append (cdr (syntax-ppss--data)) (list (cons (point-min) nil))))
+ (when pt (push (- pt (car x)) min-diffs))
+ (setq pt (car x)))
+ min-diffs))
+
+(provide 'syntax)
+
+;;; syntax.el ends here