1 files changed, 775 insertions, 0 deletions
diff --git a/lisp/emacs-lisp/syntax.el b/lisp/emacs-lisp/syntax.el
new file mode 100644
index 00000000000..e1be3015838
--- /dev/null
+++ b/lisp/emacs-lisp/syntax.el
@@ -0,0 +1,775 @@
+;;; syntax.el --- helper functions to find syntactic context  -*- lexical-binding: t -*-
+
+;; Copyright (C) 2000-2022 Free Software Foundation, Inc.
+
+;; Maintainer: emacs-devel@gnu.org
+;; Keywords: internal
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; The main exported function is `syntax-ppss'.  You might also need
+;; to call `syntax-ppss-flush-cache' or to add it to
+;; before-change-functions'(although this is automatically done by
+;; syntax-ppss when needed, but that might fail if syntax-ppss is
+;; called in a context where before-change-functions is temporarily
+;; let-bound to nil).
+
+;;; Todo:
+
+;; - do something about the case where the syntax-table is changed.
+;;   This typically happens with tex-mode and its `$' operator.
+;; - new functions `syntax-state', ... to replace uses of parse-partial-state
+;;   with something higher-level (similar to syntax-ppss-context).
+;; - interaction with mmm-mode.
+
+;;; Code:
+
+;; Note: PPSS stands for `parse-partial-sexp state'
+
+(eval-when-compile (require 'cl-lib))
+
+;;; Applying syntax-table properties where needed.
+
+(defvar syntax-propertize-function nil
+  ;; Rather than a -functions hook, this is a -function because it's easier
+  ;; to do a single scan than several scans: with multiple scans, one cannot
+  ;; assume that the text before point has been propertized, so syntax-ppss
+  ;; gives unreliable results (and stores them in its cache to boot, so we'd
+  ;; have to flush that cache between each function, and we couldn't use
+  ;; syntax-ppss-flush-cache since that would not only flush the cache but also
+  ;; reset syntax-propertize--done which should not be done in this case).
+  "Mode-specific function to apply `syntax-table' text properties.
+It is the work horse of `syntax-propertize', which is called by things like
+Font-Lock and indentation.
+
+It is given two arguments, START and END: the start and end of the text to
+which `syntax-table' might need to be applied.  Major modes can use this to
+override the buffer's syntax table for special syntactic constructs that
+cannot be handled just by the buffer's syntax-table.
+
+The specified function may call `syntax-ppss' on any position
+before END, but if it calls `syntax-ppss' on some
+position and later modifies the buffer on some earlier position,
+then it is its responsibility to call `syntax-ppss-flush-cache' to flush
+the now obsolete ppss info from the cache.
+
+Note: When this variable is a function, it must apply _all_ the
+`syntax-table' properties needed in the given text interval.
+Using both this function and other means to apply these
+properties won't work properly.")
+
+(defvar syntax-propertize-chunk-size 500)
+
+(defvar-local syntax-propertize-extend-region-functions
+  '(syntax-propertize-wholelines)
+  "Special hook run just before proceeding to propertize a region.
+This is used to allow major modes to help `syntax-propertize' find safe buffer
+positions as beginning and end of the propertized region.  Its most common use
+is to solve the problem of /identification/ of multiline elements by providing
+a function that tries to find such elements and move the boundaries such that
+they do not fall in the middle of one.
+Each function is called with two arguments (START and END) and it should return
+either a cons (NEW-START . NEW-END) or nil if no adjustment should be made.
+These functions are run in turn repeatedly until they all return nil.
+Put first the functions more likely to cause a change and cheaper to compute.")
+;; Mark it as a special hook which doesn't use any global setting
+;; (i.e. doesn't obey the element t in the buffer-local value).
+
+(cl-defstruct (ppss
+               (:constructor make-ppss)
+               (:copier nil)
+               (:type list))
+  (depth nil :documentation "Depth in parens.")
+  (innermost-start
+   nil :documentation
+   "Character address of start of innermost containing list; nil if none.")
+  (last-complete-sexp-start
+   nil :documentation
+   "Character address of start of last complete sexp terminated.")
+  (string-terminator nil :documentation "\
+Non-nil if inside a string.
+\(it is the character that will terminate the string, or t if the
+string should be terminated by a generic string delimiter.)")
+  (comment-depth nil :documentation "\
+nil if outside a comment, t if inside a non-nestable comment,
+else an integer (the current comment nesting).")
+  (quoted-p nil :documentation "t if following a quote character.")
+  (min-depth
+   nil :documentation "The minimum depth in parens encountered during this scan.")
+  (comment-style nil :documentation "Style of comment, if any.")
+  (comment-or-string-start
+   nil :documentation
+   "Character address of start of comment or string; nil if not in one.")
+  (open-parens
+   nil :documentation
+   "List of positions of currently open parens, outermost first.")
+  (two-character-syntax nil :documentation "\
+When the last position scanned holds the first character of a
+\(potential) two character construct, the syntax of that position,
+otherwise nil.  That construct can be a two character comment
+delimiter or an Escaped or Char-quoted character."))
+
+(defvar syntax-wholeline-max 10000
+  "Maximum line length for syntax operations.
+If lines are longer than that, syntax operations will treat them as chunks
+of this size.  Misfontification may then occur.
+This is a tradeoff between correctly applying the syntax rules,
+and avoiding major slowdown on pathologically long lines.")
+
+(defun syntax--lbp (&optional arg)
+  "Like `line-beginning-position' but obeying `syntax-wholeline-max'."
+  (let ((pos (point))
+        (res (line-beginning-position arg)))
+    (cond
+     ((< (abs (- pos res)) syntax-wholeline-max) res)
+     ;; For lines that are too long, round to the nearest multiple of
+     ;; `syntax-wholeline-max'.  We use rounding rather than just
+     ;; (min res (+ pos syntax-wholeline-max)) so that repeated calls
+     ;; to `syntax-propertize-wholelines' don't keep growing the bounds,
+     ;; i.e. it really behaves like additional line-breaks.
+     ((< res pos)
+      (let ((max syntax-wholeline-max))
+        (max (point-min) (* max (truncate pos max)))))
+     (t
+      (let ((max syntax-wholeline-max))
+        (min (point-max) (* max (ceiling pos max))))))))
+
+(defun syntax-propertize-wholelines (beg end)
+  "Extend the region delimited by BEG and END to whole lines.
+This function is useful for
+`syntax-propertize-extend-region-functions';
+see Info node `(elisp) Syntax Properties'."
+  ;; This let-binding was taken from
+  ;; `font-lock-extend-region-wholelines' where it was used to avoid
+  ;; inf-looping (Bug#21615) but for some reason it was not applied
+  ;; here in syntax.el and was used only for the "beg" side.
+  (let ((inhibit-field-text-motion t))
+    (let ((new-beg (progn (goto-char beg)
+                          (if (bolp) beg
+                            (syntax--lbp))))
+          (new-end (progn (goto-char end)
+                          (if (bolp) end
+                            (syntax--lbp 2)))))
+      (unless (and (eql beg new-beg) (eql end new-end))
+        (cons new-beg new-end)))))
+
+(defun syntax-propertize-multiline (beg end)
+  "Let `syntax-propertize' pay attention to the syntax-multiline property."
+  (when (and (> beg (point-min))
+	     (get-text-property (1- beg) 'syntax-multiline))
+    (setq beg (or (previous-single-property-change beg 'syntax-multiline)
+		  (point-min))))
+  ;;
+  (when (get-text-property end 'syntax-multiline)
+    (setq end (or (text-property-any end (point-max)
+				     'syntax-multiline nil)
+		  (point-max))))
+  (cons beg end))
+
+(defun syntax-propertize--shift-groups-and-backrefs (re n)
+  (let ((new-re (replace-regexp-in-string
+                 "\\\\(\\?\\([0-9]+\\):"
+                 (lambda (s)
+                   (replace-match
+                    (number-to-string
+                     (+ n (string-to-number (match-string 1 s))))
+                    t t s 1))
+                 re t t))
+        (pos 0))
+    (while (string-match "\\\\\\([0-9]+\\)" new-re pos)
+      (setq pos (+ 1 (match-beginning 1)))
+      (when (save-match-data
+              ;; With \N, the \ must be in a subregexp context, i.e.,
+              ;; not in a character class or in a \{\} repetition.
+              (subregexp-context-p new-re (match-beginning 0)))
+        (let ((shifted (+ n (string-to-number (match-string 1 new-re)))))
+          (when (> shifted 9)
+            (error "There may be at most nine back-references"))
+          (setq new-re (replace-match (number-to-string shifted)
+                                      t t new-re 1)))))
+    new-re))
+
+(defmacro syntax-propertize-precompile-rules (&rest rules)
+  "Return a precompiled form of RULES to pass to `syntax-propertize-rules'.
+The arg RULES can be of the same form as in `syntax-propertize-rules'.
+The return value is an object that can be passed as a rule to
+`syntax-propertize-rules'.
+I.e. this is useful only when you want to share rules among several
+`syntax-propertize-function's."
+  (declare (debug syntax-propertize-rules))
+  ;; Precompile?  Yeah, right!
+  ;; Seriously, tho, this is a macro for 2 reasons:
+  ;; - we could indeed do some pre-compilation at some point in the future,
+  ;;   e.g. fi/when we switch to a DFA-based implementation of
+  ;;   syntax-propertize-rules.
+  ;; - this lets Edebug properly annotate the expressions inside RULES.
+  `',rules)
+
+(defmacro syntax-propertize-rules (&rest rules)
+  "Make a function that applies RULES for use in `syntax-propertize-function'.
+The function will scan the buffer, applying the rules where they match.
+The buffer is scanned a single time, like \"lex\" would, rather than once
+per rule.
+
+Each RULE can be a symbol, in which case that symbol's value should be,
+at macro-expansion time, a precompiled set of rules, as returned
+by `syntax-propertize-precompile-rules'.
+
+Otherwise, RULE should have the form (REGEXP HIGHLIGHT1 ... HIGHLIGHTn), where
+REGEXP is an expression (evaluated at time of macro-expansion) that returns
+a regexp, and where HIGHLIGHTs have the form (NUMBER SYNTAX) which means to
+apply the property SYNTAX to the chars matched by the subgroup NUMBER
+of the regular expression, if NUMBER did match.
+SYNTAX is an expression that returns a value to apply as `syntax-table'
+property.  Some expressions are handled specially:
+- if SYNTAX is a string, then it is converted with `string-to-syntax';
+- if SYNTAX has the form (prog1 EXP . EXPS) then the value returned by EXP
+  will be applied to the buffer before running EXPS and if EXP is a string it
+  is also converted with `string-to-syntax'.
+The SYNTAX expression is responsible to save the `match-data' if needed
+for subsequent HIGHLIGHTs.
+Also SYNTAX is free to move point, in which case RULES may not be applied to
+some parts of the text or may be applied several times to other parts.
+
+Note: There may be at most nine back-references in the REGEXPs of
+all RULES in total."
+  (declare (debug (&rest &or symbolp    ;FIXME: edebug this eval step.
+                         (form &rest
+                               (numberp
+                                [&or stringp ;FIXME: Use &wrap
+                                     ("prog1" [&or stringp def-form] def-body)
+                                     def-form])))))
+  (let ((newrules nil))
+    (while rules
+      (if (symbolp (car rules))
+          (setq rules (append (symbol-value (pop rules)) rules))
+        (push (pop rules) newrules)))
+    (setq rules (nreverse newrules)))
+  (let* ((offset 0)
+         (branches '())
+         ;; We'd like to use a real DFA-based lexer, usually, but since Emacs
+         ;; doesn't have one yet, we fallback on building one large regexp
+         ;; and use groups to determine which branch of the regexp matched.
+         (re
+          (mapconcat
+           (lambda (rule)
+             (let* ((orig-re (eval (car rule) t))
+                    (re orig-re))
+               (when (and (assq 0 rule) (cdr rules))
+                 ;; If there's more than 1 rule, and the rule want to apply
+                 ;; highlight to match 0, create an extra group to be able to
+                 ;; tell when *this* match 0 has succeeded.
+                 (cl-incf offset)
+                 (setq re (concat "\\(" re "\\)")))
+               (setq re (syntax-propertize--shift-groups-and-backrefs re offset))
+               (let ((code '())
+                     (condition
+                      (cond
+                       ((assq 0 rule) (if (zerop offset) t
+                                        `(match-beginning ,offset)))
+                       ((and (cdr rule) (null (cddr rule)))
+                        `(match-beginning ,(+ offset (car (cadr rule)))))
+                       (t
+                        `(or ,@(mapcar
+                                (lambda (case)
+                                  `(match-beginning ,(+ offset (car case))))
+                                (cdr rule))))))
+                     (nocode t)
+                     (offset offset))
+                 ;; If some of the subgroup rules include Elisp code, then we
+                 ;; need to set the match-data so it's consistent with what the
+                 ;; code expects.  If not, then we can simply use shifted
+                 ;; offset in our own code.
+                 (unless (zerop offset)
+                   (dolist (case (cdr rule))
+                     (unless (stringp (cadr case))
+                       (setq nocode nil)))
+                   (unless nocode
+                     (push `(let ((md (match-data 'ints)))
+                              ;; Keep match 0 as is, but shift everything else.
+                              (setcdr (cdr md) (nthcdr ,(* (1+ offset) 2) md))
+                              (set-match-data md))
+                           code)
+                     (setq offset 0)))
+                 ;; Now construct the code for each subgroup rules.
+                 (dolist (case (cdr rule))
+                   (cl-assert (null (cddr case)))
+                   (let* ((gn (+ offset (car case)))
+                          (action (nth 1 case))
+                          (thiscode
+                           (cond
+                            ((stringp action)
+                             `((put-text-property
+                                (match-beginning ,gn) (match-end ,gn)
+                                'syntax-table
+                                ',(string-to-syntax action))))
+                            ((eq (car-safe action) 'ignore)
+                             (cdr action))
+                            ((eq (car-safe action) 'prog1)
+                             (if (stringp (nth 1 action))
+                                 `((put-text-property
+                                    (match-beginning ,gn) (match-end ,gn)
+                                    'syntax-table
+                                    ',(string-to-syntax (nth 1 action)))
+                                   ,@(nthcdr 2 action))
+                               `((let ((mb (match-beginning ,gn))
+                                       (me (match-end ,gn)))
+                                   ,(macroexp-let2 nil syntax (nth 1 action)
+                                      `(progn
+                                         (if ,syntax
+                                             (put-text-property
+                                              mb me 'syntax-table ,syntax))
+                                         ,@(nthcdr 2 action)))))))
+                            (t
+                             `((let ((mb (match-beginning ,gn))
+                                     (me (match-end ,gn))
+                                     (syntax ,action))
+                                 (if syntax
+                                     (put-text-property
+                                      mb me 'syntax-table syntax))))))))
+
+                     (if (or (not (cddr rule)) (zerop gn))
+                         (setq code (nconc (nreverse thiscode) code))
+                       (push `(if (match-beginning ,gn)
+                                  ;; Try and generate clean code with no
+                                  ;; extraneous progn.
+                                  ,(if (null (cdr thiscode))
+                                       (car thiscode)
+                                     `(progn ,@thiscode)))
+                             code))))
+                 (push (cons condition (nreverse code))
+                       branches))
+               (cl-incf offset (regexp-opt-depth orig-re))
+               re))
+           rules
+           "\\|")))
+    `(lambda (start end)
+       (goto-char start)
+       (while (and (< (point) end)
+                   (re-search-forward ,re end t))
+         (cond ,@(nreverse branches))))))
+
+(defun syntax-propertize-via-font-lock (keywords)
+  "Propertize for syntax using font-lock syntax.
+KEYWORDS obeys the format used in `font-lock-syntactic-keywords'.
+The return value is a function (with two parameters, START and
+END) suitable for `syntax-propertize-function'."
+  (lambda (start end)
+    (with-no-warnings
+      (let ((font-lock-syntactic-keywords keywords))
+        (font-lock-fontify-syntactic-keywords-region start end)
+        ;; In case it was eval'd/compiled.
+        (setq keywords font-lock-syntactic-keywords)))))
+
+(defvar-local syntax-ppss-table nil
+  "Syntax-table to use during `syntax-ppss', if any.")
+
+(defun syntax-propertize--in-process-p ()
+  "Non-nil if we're inside `syntax-propertize'.
+This is used to avoid infinite recursion as well as to handle cases where
+`syntax-ppss' is called when the final `syntax-table' properties have not
+yet been setup, in which case we may end up putting invalid info into the cache.
+It's also used so that `syntax-ppss-flush-cache' can be used from within
+`syntax-propertize' without ruining the `syntax-table' already set."
+  (eq syntax-propertize--done most-positive-fixnum))
+
+(defvar-local syntax-ppss--updated-cache nil)
+
+(defun syntax-propertize (pos)
+  "Ensure that syntax-table properties are set until POS (a buffer point)."
+  (when (< syntax-propertize--done pos)
+    (if (memq syntax-propertize-function '(nil ignore))
+        (setq syntax-propertize--done (max (point-max) pos))
+      ;; (message "Needs to syntax-propertize from %s to %s"
+      ;;          syntax-propertize--done pos)
+      (setq-local parse-sexp-lookup-properties t)
+      (when (< syntax-propertize--done (point-min))
+        ;; *Usually* syntax-propertize is called via syntax-ppss which
+        ;; takes care of adding syntax-ppss-flush-cache to b-c-f, but this
+        ;; is not *always* the case, so since we share a single "flush" function
+        ;; between syntax-ppss and syntax-propertize, we also have to make
+        ;; sure the flush function is installed here (bug#29767).
+        (add-hook 'before-change-functions
+	          #'syntax-ppss-flush-cache 99 t))
+      (save-excursion
+        (with-silent-modifications
+          (with-syntax-table (or syntax-ppss-table (syntax-table))
+            (make-local-variable 'syntax-propertize--done) ;Just in case!
+            ;; Make sure we let-bind it only buffer-locally.
+            (make-local-variable 'syntax-ppss--updated-cache)
+            (let* ((start (max (min syntax-propertize--done (point-max))
+                               (point-min)))
+                   (end (max pos
+                             (min (point-max)
+                                  (+ start syntax-propertize-chunk-size))))
+                   (first t)
+                   (repeat t)
+                   (syntax-ppss--updated-cache nil))
+              (while repeat
+                (setq repeat nil)
+                (run-hook-wrapped
+                 'syntax-propertize-extend-region-functions
+                 (lambda (f)
+                   ;; Bind `syntax-propertize--done' to avoid recursion!
+                   (let* ((syntax-propertize--done most-positive-fixnum)
+                          (new (funcall f start end)))
+                     (if (or (null new)
+                             (and (>= (car new) start) (<= (cdr new) end)))
+                         nil
+                       (setq start (car new))
+                       (setq end (cdr new))
+                       ;; If there's been a change, we should go through the
+                       ;; list again since this new position may
+                       ;; warrant a different answer from one of the funs we've
+                       ;; already seen.
+                       (unless first (setq repeat t))))
+                   (setq first nil))))
+              ;; Flush ppss cache between the original value of `start' and that
+              ;; set above by syntax-propertize-extend-region-functions.
+              (syntax-ppss-flush-cache start)
+              ;; Move the limit before calling the function, so it's
+              ;; done in case of errors.
+              (setq syntax-propertize--done end)
+              ;; (message "syntax-propertizing from %s to %s" start end)
+              (remove-text-properties start end
+                                      '(syntax-table nil syntax-multiline nil))
+              ;; Bind `syntax-propertize--done' to avoid recursion!
+              (let ((syntax-propertize--done most-positive-fixnum))
+                (funcall syntax-propertize-function start end)
+                (when syntax-ppss--updated-cache
+                  ;; `syntax-ppss' was called and updated the cache while we
+                  ;; were propertizing so we need to flush the part of the
+                  ;; cache that may have been rendered out-of-date by the new
+                  ;; properties.
+                  ;; We used to require syntax-propertize-functions to do that
+                  ;; manually when applicable, but nowadays the `syntax-ppss'
+                  ;; cache can be updated by too many functions, so the author
+                  ;; of the syntax-propertize-function may not be aware it
+                  ;; can happen.
+                  (syntax-ppss-flush-cache start))))))))))
+
+;;; Link syntax-propertize with syntax.c.
+
+(defvar syntax-propertize-chunks
+  ;; We're not sure how far we'll go.  In my tests, using chunks of 2000
+  ;; brings the overhead to something negligible.  Passing ‘charpos’ directly
+  ;; also works (basically works line-by-line) but results in an overhead which
+  ;; I thought was a bit too high (like around 50%).
+  2000)
+
+(defun internal--syntax-propertize (charpos)
+  ;; FIXME: Called directly from C.
+  (save-match-data
+    (syntax-propertize (min (+ syntax-propertize-chunks charpos) (point-max)))))
+
+;;; Incrementally compute and memoize parser state.
+
+(defsubst syntax-ppss-depth (ppss)
+  (nth 0 ppss))
+
+(defun syntax-ppss-toplevel-pos (ppss)
+  "Get the latest syntactically outermost position found in a syntactic scan.
+PPSS is a scan state, as returned by `parse-partial-sexp' or `syntax-ppss'.
+An \"outermost position\" means one that it is outside of any syntactic entity:
+outside of any parentheses, comments, or strings encountered in the scan.
+If no such position is recorded in PPSS (because the end of the scan was
+itself at the outermost level), return nil."
+  (or (car (nth 9 ppss))
+      (nth 8 ppss)))
+
+(defsubst syntax-ppss-context (ppss)
+  "Say whether PPSS is a string, a comment, or something else.
+If PPSS is a string, the symbol `string' is returned.  If it's a
+comment, the symbol `comment' is returned.  If it's something
+else, nil is returned."
+  (cond
+   ((nth 3 ppss) 'string)
+   ((nth 4 ppss) 'comment)
+   (t nil)))
+
+(defvar syntax-ppss-max-span 20000
+  "Threshold below which cache info is deemed unnecessary.
+We try to make sure that cache entries are at least this far apart
+from each other, to avoid keeping too much useless info.")
+
+(defvar syntax-begin-function nil
+  "Function to move back outside of any comment/string/paren.
+This function should move the cursor back to some syntactically safe
+point (where the PPSS is equivalent to nil).")
+(make-obsolete-variable 'syntax-begin-function nil "25.1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Several caches.
+;;
+;; Because `syntax-ppss' is equivalent to (parse-partial-sexp
+;; (POINT-MIN) x), we need either to empty the cache when we narrow
+;; the buffer, which is suboptimal, or we need to use several caches.
+;; We use two of them, one for widened buffer, and one for narrowing.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defvar-local syntax-ppss-wide nil
+  "Cons of two elements (LAST . CACHE).
+Where LAST is a pair (LAST-POS . LAST-PPS) caching the last invocation
+and CACHE is a list of (POS . PPSS) pairs, in decreasing POS order.
+These are valid when the buffer has no restriction.")
+
+(defvar-local syntax-ppss-narrow nil
+  "Same as `syntax-ppss-wide' but for a narrowed buffer.")
+
+(defvar-local syntax-ppss-narrow-start nil
+  "Start position of the narrowing for `syntax-ppss-narrow'.")
+
+(define-obsolete-function-alias 'syntax-ppss-after-change-function
+  #'syntax-ppss-flush-cache "27.1")
+(defun syntax-ppss-flush-cache (beg &rest _ignored)
+  "Flush the cache of `syntax-ppss' starting at position BEG."
+  ;; Set syntax-propertize to refontify anything past beg.
+  (unless (syntax-propertize--in-process-p)
+    (setq syntax-propertize--done (min beg syntax-propertize--done)))
+  ;; Flush invalid cache entries.
+  (dolist (cell (list syntax-ppss-wide syntax-ppss-narrow))
+    (pcase cell
+      (`(,last . ,cache)
+       (while (and cache (> (caar cache) beg))
+         (setq cache (cdr cache)))
+       ;; Throw away `last' value if made invalid.
+       (when (< beg (or (car last) 0))
+         ;; If syntax-begin-function jumped to BEG, then the old state at BEG can
+         ;; depend on the text after BEG (which is presumably changed).  So if
+         ;; BEG=(car (nth 10 syntax-ppss-last)) don't reuse that data because the
+         ;; assumed nil state at BEG may not be valid any more.
+         (if (<= beg (or (syntax-ppss-toplevel-pos (cdr last))
+                         (nth 3 last)
+                         0))
+	     (setq last nil)
+           (setcar last nil)))
+       ;; Unregister if there's no cache left.  Sadly this doesn't work
+       ;; because `before-change-functions' is temporarily bound to nil here.
+       ;; (unless cache
+       ;;   (remove-hook 'before-change-functions #'syntax-ppss-flush-cache t))
+       (setcar cell last)
+       (setcdr cell cache)))
+    ))
+
+;; FIXME: Explain this variable.  Currently only its last (5th) slot is used.
+;; Perhaps the other slots should be removed?
+;; This variable is only used when `syntax-begin-function' is used and
+;; will hence be removed together with `syntax-begin-function'.
+(defvar syntax-ppss-stats
+  [(0 . 0) (0 . 0) (0 . 0) (0 . 0) (0 . 0) (2 . 2500)]
+  "Statistics about which case is more/less frequent in `syntax-ppss'.
+The 5th slot drives the heuristic to use `syntax-begin-function'.
+The rest is only useful if you're interested in tweaking the algorithm.")
+
+(defun syntax-ppss-stats ()
+  (mapcar (lambda (x)
+	    (condition-case nil
+		(cons (car x) (/ (cdr x) (car x)))
+	      (error nil)))
+	  syntax-ppss-stats))
+(defun syntax-ppss--update-stats (i old new)
+  (let ((pair (aref syntax-ppss-stats i)))
+    (cl-incf (car pair))
+    (cl-incf (cdr pair) (- new old))))
+
+(defun syntax-ppss--data ()
+  (if (eq (point-min) 1)
+      (progn
+        (unless syntax-ppss-wide
+          (setq syntax-ppss-wide (cons nil nil)))
+        syntax-ppss-wide)
+    (unless (eq syntax-ppss-narrow-start (point-min))
+      (setq syntax-ppss-narrow-start (point-min))
+      (setq syntax-ppss-narrow (cons nil nil)))
+    syntax-ppss-narrow))
+
+(defun syntax-ppss (&optional pos)
+  "Parse-Partial-Sexp State at POS, defaulting to point.
+If POS is given, this function moves point to POS.
+
+The returned value is the same as that of `parse-partial-sexp'
+run from `point-min' to POS except that values at positions 2 and 6
+in the returned list (counting from 0) cannot be relied upon.
+
+It is necessary to call `syntax-ppss-flush-cache' explicitly if
+this function is called while `before-change-functions' is
+temporarily let-bound, or if the buffer is modified without
+running the hook."
+  ;; Default values.
+  (unless pos (setq pos (point)))
+  (syntax-propertize pos)
+  ;;
+  (with-syntax-table (or syntax-ppss-table (syntax-table))
+  (let* ((cell (syntax-ppss--data))
+         (ppss-last (car cell))
+         (ppss-cache (cdr cell))
+         (old-ppss (cdr ppss-last))
+         (old-pos (car ppss-last))
+         (ppss nil)
+         (pt-min (point-min)))
+    (if (and old-pos (> old-pos pos)) (setq old-pos nil))
+    ;; Use the OLD-POS if usable and close.  Don't update the `last' cache.
+    (condition-case nil
+	(if (and old-pos (< (- pos old-pos)
+			    ;; The time to use syntax-begin-function and
+			    ;; find PPSS is assumed to be about 2 * distance.
+			    (let ((pair (aref syntax-ppss-stats 5)))
+			      (/ (* 2 (cdr pair)) (car pair)))))
+	    (progn
+	      (syntax-ppss--update-stats 0 old-pos pos)
+	      (parse-partial-sexp old-pos pos nil nil old-ppss))
+
+	  (cond
+	   ;; Use OLD-PPSS if possible and close enough.
+	   ((and (not old-pos) old-ppss
+                 ;; If `pt-min' is too far from `pos', we could try to use
+		 ;; other positions in (nth 9 old-ppss), but that doesn't
+		 ;; seem to happen in practice and it would complicate this
+		 ;; code (and the before-change-function code even more).
+		 ;; But maybe it would be useful in "degenerate" cases such
+		 ;; as when the whole file is wrapped in a set
+		 ;; of parentheses.
+		 (setq pt-min (or (syntax-ppss-toplevel-pos old-ppss)
+				  (nth 2 old-ppss)))
+		 (<= pt-min pos) (< (- pos pt-min) syntax-ppss-max-span))
+	    (syntax-ppss--update-stats 1 pt-min pos)
+	    (setq ppss (parse-partial-sexp pt-min pos)))
+	   ;; The OLD-* data can't be used.  Consult the cache.
+	   (t
+	    (let ((cache-pred nil)
+		  (cache ppss-cache)
+		  (pt-min (point-min))
+		  ;; I differentiate between PT-MIN and PT-BEST because
+		  ;; I feel like it might be important to ensure that the
+		  ;; cache is only filled with 100% sure data (whereas
+		  ;; syntax-begin-function might return incorrect data).
+		  ;; Maybe that's just stupid.
+		  (pt-best (point-min))
+		  (ppss-best nil))
+	      ;; look for a usable cache entry.
+	      (while (and cache (< pos (caar cache)))
+		(setq cache-pred cache)
+		(setq cache (cdr cache)))
+	      (if cache (setq pt-min (caar cache) ppss (cdar cache)))
+
+	      ;; Setup the before-change function if necessary.
+	      (unless (or ppss-cache ppss-last)
+                ;; Note: combine-change-calls-1 needs to be kept in sync
+                ;; with this!
+		(add-hook 'before-change-functions
+			  #'syntax-ppss-flush-cache
+                          ;; We should be either the very last function on
+                          ;; before-change-functions or the very first on
+                          ;; after-change-functions.
+                          99 t))
+
+	      ;; Use the best of OLD-POS and CACHE.
+	      (if (or (not old-pos) (< old-pos pt-min))
+		  (setq pt-best pt-min ppss-best ppss)
+		(syntax-ppss--update-stats 4 old-pos pos)
+		(setq pt-best old-pos ppss-best old-ppss))
+
+	      ;; Use the `syntax-begin-function' if available.
+	      ;; We could try using that function earlier, but:
+	      ;; - The result might not be 100% reliable, so it's better to use
+	      ;;   the cache if available.
+	      ;; - The function might be slow.
+	      ;; - If this function almost always finds a safe nearby spot,
+	      ;;   the cache won't be populated, so consulting it is cheap.
+	      (when (and syntax-begin-function
+			 (progn (goto-char pos)
+				(funcall syntax-begin-function)
+				;; Make sure it's better.
+				(> (point) pt-best))
+			 ;; Simple sanity checks.
+                         (< (point) pos) ; backward-paragraph can fail here.
+			 (not (memq (get-text-property (point) 'face)
+				    '(font-lock-string-face font-lock-doc-face
+				      font-lock-comment-face))))
+		(syntax-ppss--update-stats 5 (point) pos)
+		(setq pt-best (point) ppss-best nil))
+
+	      (cond
+	       ;; Quick case when we found a nearby pos.
+	       ((< (- pos pt-best) syntax-ppss-max-span)
+		(syntax-ppss--update-stats 2 pt-best pos)
+		(setq ppss (parse-partial-sexp pt-best pos nil nil ppss-best)))
+	       ;; Slow case: compute the state from some known position and
+	       ;; populate the cache so we won't need to do it again soon.
+	       (t
+		(syntax-ppss--update-stats 3 pt-min pos)
+                (setq syntax-ppss--updated-cache t)
+
+		;; If `pt-min' is too far, add a few intermediate entries.
+		(while (> (- pos pt-min) (* 2 syntax-ppss-max-span))
+		  (setq ppss (parse-partial-sexp
+			      pt-min (setq pt-min (/ (+ pt-min pos) 2))
+			      nil nil ppss))
+                  (push (cons pt-min ppss)
+                        (if cache-pred (cdr cache-pred) ppss-cache)))
+
+		;; Compute the actual return value.
+		(setq ppss (parse-partial-sexp pt-min pos nil nil ppss))
+
+		;; Debugging check.
+		;; (let ((real-ppss (parse-partial-sexp (point-min) pos)))
+		;;   (setcar (last ppss 4) 0)
+		;;   (setcar (last real-ppss 4) 0)
+		;;   (setcar (last ppss 8) nil)
+		;;   (setcar (last real-ppss 8) nil)
+		;;   (unless (equal ppss real-ppss)
+		;;     (message "!!Syntax: %s != %s" ppss real-ppss)
+		;;     (setq ppss real-ppss)))
+
+		;; Store it in the cache.
+		(let ((pair (cons pos ppss)))
+		  (if cache-pred
+		      (if (> (- (caar cache-pred) pos) syntax-ppss-max-span)
+			  (push pair (cdr cache-pred))
+			(setcar cache-pred pair))
+		    (if (or (null ppss-cache)
+			    (> (- (caar ppss-cache) pos)
+			       syntax-ppss-max-span))
+			(push pair ppss-cache)
+		      (setcar ppss-cache pair)))))))))
+
+          (setq syntax-ppss--updated-cache t)
+	  (setq ppss-last (cons pos ppss))
+          (setcar cell ppss-last)
+          (setcdr cell ppss-cache)
+	  ppss)
+      (args-out-of-range
+       ;; If the buffer is more narrowed than when we built the cache,
+       ;; we may end up calling parse-partial-sexp with a position before
+       ;; point-min.  In that case, just parse from point-min assuming
+       ;; a nil state.
+       (parse-partial-sexp (point-min) pos))))))
+
+;; Debugging functions
+
+(defun syntax-ppss-debug ()
+  (let ((pt nil)
+	(min-diffs nil))
+    (dolist (x (append (cdr (syntax-ppss--data)) (list (cons (point-min) nil))))
+      (when pt (push (- pt (car x)) min-diffs))
+      (setq pt (car x)))
+    min-diffs))
+
+(provide 'syntax)
+
+;;; syntax.el ends here