1 files changed, 1499 insertions, 0 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
new file mode 100644
index 00000000000..ec51146484a
--- /dev/null
+++ b/lisp/emacs-lisp/rx.el
@@ -0,0 +1,1499 @@
+;;; rx.el --- S-exp notation for regexps           --*- lexical-binding: t -*-
+
+;; Copyright (C) 2001-2022 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This facility allows writing regexps in a sexp-based language
+;; instead of strings.  Regexps in the `rx' notation are easier to
+;; read, write and maintain; they can be indented and commented in a
+;; natural way, and are easily composed by program code.
+;; The translation to string regexp is done by a macro and does not
+;; incur any extra processing during run time.  Example:
+;;
+;;  (rx bos (or (not (any "^"))
+;;              (seq "^" (or " *" "["))))
+;;
+;; => "\\`\\(?:[^^]\\|\\^\\(?: \\*\\|\\[\\)\\)"
+;;
+;; The notation is much influenced by and retains some compatibility with
+;; Olin Shivers's SRE, with concessions to Emacs regexp peculiarities,
+;; and the older Emacs package Sregex.
+
+;;; Code:
+
+;; The `rx--translate...' functions below return (REGEXP . PRECEDENCE),
+;; where REGEXP is a list of string expressions that will be
+;; concatenated into a regexp, and PRECEDENCE is one of
+;;
+;;  t    -- can be used as argument to postfix operators (eg. "a")
+;;  seq  -- can be concatenated in sequence with other seq or higher (eg. "ab")
+;;  lseq -- can be concatenated to the left of rseq or higher (eg. "^a")
+;;  rseq -- can be concatenated to the right of lseq or higher (eg. "a$")
+;;  nil  -- can only be used in alternatives (eg. "a\\|b")
+;;
+;; They form a lattice:
+;;
+;;           t          highest precedence
+;;           |
+;;          seq
+;;         /   \
+;;      lseq   rseq
+;;         \   /
+;;          nil         lowest precedence
+
+
+(defconst rx--char-classes
+  '((digit         . digit)
+    (numeric       . digit)
+    (num           . digit)
+    (control       . cntrl)
+    (cntrl         . cntrl)
+    (hex-digit     . xdigit)
+    (hex           . xdigit)
+    (xdigit        . xdigit)
+    (blank         . blank)
+    (graphic       . graph)
+    (graph         . graph)
+    (printing      . print)
+    (print         . print)
+    (alphanumeric  . alnum)
+    (alnum         . alnum)
+    (letter        . alpha)
+    (alphabetic    . alpha)
+    (alpha         . alpha)
+    (ascii         . ascii)
+    (nonascii      . nonascii)
+    (lower         . lower)
+    (lower-case    . lower)
+    (punctuation   . punct)
+    (punct         . punct)
+    (space         . space)
+    (whitespace    . space)
+    (white         . space)
+    (upper         . upper)
+    (upper-case    . upper)
+    (word          . word)
+    (wordchar      . word)
+    (unibyte       . unibyte)
+    (multibyte     . multibyte))
+  "Alist mapping rx symbols to character classes.
+Most of the names are from SRE.")
+
+(defvar rx-constituents nil
+  "Alist of old-style rx extensions, for compatibility.
+For new code, use `rx-define', `rx-let' or `rx-let-eval'.
+
+Each element is (SYMBOL . DEF).
+
+If DEF is a symbol, then SYMBOL is an alias of DEF.
+
+If DEF is a string, then SYMBOL is a plain rx symbol defined as the
+   regexp string DEF.
+
+If DEF is a list on the form (FUN MIN-ARGS MAX-ARGS PRED), then
+   SYMBOL is an rx form with at least MIN-ARGS and at most
+   MAX-ARGS arguments.  If MAX-ARGS is nil, then there is no upper limit.
+   FUN is a function taking the entire rx form as single argument
+   and returning the translated regexp string.
+   If PRED is non-nil, it is a predicate that all actual arguments must
+   satisfy.")
+
+(defvar rx--local-definitions nil
+  "Alist of dynamic local rx definitions.
+Each entry is:
+ (NAME DEF)      -- NAME is an rx symbol defined as the rx form DEF.
+ (NAME ARGS DEF) -- NAME is an rx form with arglist ARGS, defined
+                    as the rx form DEF (which can contain members of ARGS).")
+
+(defsubst rx--lookup-def (name)
+  "Current definition of NAME: (DEF) or (ARGS DEF), or nil if none."
+  (or (cdr (assq name rx--local-definitions))
+      (get name 'rx-definition)))
+
+(defun rx--expand-def (form)
+  "FORM expanded (once) if a user-defined construct; otherwise nil."
+  (cond ((symbolp form)
+         (let ((def (rx--lookup-def form)))
+           (and def
+                (if (cdr def)
+                    (error "Not an `rx' symbol definition: %s" form)
+                  (car def)))))
+        ((and (consp form) (symbolp (car form)))
+         (let* ((op (car form))
+                (def (rx--lookup-def op)))
+           (and def
+                (if (cdr def)
+                    (rx--expand-template
+                     op (cdr form) (nth 0 def) (nth 1 def))
+                  (error "Not an `rx' form definition: %s" op)))))))
+
+;; TODO: Additions to consider:
+;; - A construct like `or' but without the match order guarantee,
+;;   maybe `unordered-or'.  Useful for composition or generation of
+;;   alternatives; permits more effective use of regexp-opt.
+
+(defun rx--translate-symbol (sym)
+  "Translate an rx symbol.  Return (REGEXP . PRECEDENCE)."
+  (pcase sym
+    ;; Use `list' instead of a quoted list to wrap the strings here,
+    ;; since the return value may be mutated.
+    ((or 'nonl 'not-newline 'any) (cons (list ".") t))
+    ((or 'anychar 'anything)      (cons (list "[^z-a]") t))
+    ('unmatchable                 (rx--empty))
+    ((or 'bol 'line-start)        (cons (list "^") 'lseq))
+    ((or 'eol 'line-end)          (cons (list "$") 'rseq))
+    ((or 'bos 'string-start 'bot 'buffer-start) (cons (list "\\`") t))
+    ((or 'eos 'string-end   'eot 'buffer-end)   (cons (list "\\'") t))
+    ('point                       (cons (list "\\=") t))
+    ((or 'bow 'word-start)        (cons (list "\\<") t))
+    ((or 'eow 'word-end)          (cons (list "\\>") t))
+    ('word-boundary               (cons (list "\\b") t))
+    ('not-word-boundary           (cons (list "\\B") t))
+    ('symbol-start                (cons (list "\\_<") t))
+    ('symbol-end                  (cons (list "\\_>") t))
+    ('not-wordchar                (cons (list "\\W") t))
+    (_
+     (cond
+      ((let ((class (cdr (assq sym rx--char-classes))))
+         (and class (cons (list (concat "[[:" (symbol-name class) ":]]")) t))))
+
+      ((let ((expanded (rx--expand-def sym)))
+         (and expanded (rx--translate expanded))))
+
+      ;; For compatibility with old rx.
+      ((let ((entry (assq sym rx-constituents)))
+         (and (progn
+                (while (and entry (not (stringp (cdr entry))))
+                  (setq entry
+                        (if (symbolp (cdr entry))
+                            ;; Alias for another entry.
+                            (assq (cdr entry) rx-constituents)
+                          ;; Wrong type, try further down the list.
+                          (assq (car entry)
+                                (cdr (memq entry rx-constituents))))))
+                entry)
+              (cons (list (cdr entry)) nil))))
+      (t (error "Unknown rx symbol `%s'" sym))))))
+
+(defun rx--enclose (left-str rexp right-str)
+  "Bracket REXP by LEFT-STR and RIGHT-STR."
+  (append (list left-str) rexp (list right-str)))
+
+(defun rx--bracket (rexp)
+  (rx--enclose "\\(?:" rexp "\\)"))
+
+(defun rx--sequence (left right)
+  "Return the sequence (concatenation) of two translated items,
+each on the form (REGEXP . PRECEDENCE), returning (REGEXP . PRECEDENCE)."
+  ;; Concatenation rules:
+  ;;  seq  ++ seq  -> seq
+  ;;  lseq ++ seq  -> lseq
+  ;;  seq  ++ rseq -> rseq
+  ;;  lseq ++ rseq -> nil
+  (cond ((not (car left)) right)
+        ((not (car right)) left)
+        (t
+         (let ((l (if (memq (cdr left) '(nil rseq))
+                      (cons (rx--bracket (car left)) t)
+                    left))
+               (r (if (memq (cdr right) '(nil lseq))
+                      (cons (rx--bracket (car right)) t)
+                    right)))
+           (cons (append (car l) (car r))
+                 (if (eq (cdr l) 'lseq)
+                     (if (eq (cdr r) 'rseq)
+                         nil                   ; lseq ++ rseq
+                       'lseq)                  ; lseq ++ seq
+                   (if (eq (cdr r) 'rseq)
+                       'rseq                   ; seq ++ rseq
+                     'seq)))))))               ; seq ++ seq
+
+(defun rx--translate-seq (body)
+  "Translate a sequence of zero or more rx items.
+Return (REGEXP . PRECEDENCE)."
+  (if body
+      (let* ((items (mapcar #'rx--translate body))
+             (result (car items)))
+        (dolist (item (cdr items))
+          (setq result (rx--sequence result item)))
+        result)
+    (cons nil 'seq)))
+
+(defun rx--empty ()
+  "Regexp that never matches anything."
+  (cons (list regexp-unmatchable) 'seq))
+
+;; `cl-every' replacement to avoid bootstrapping problems.
+(defun rx--every (pred list)
+  "Whether PRED is true for every element of LIST."
+  (while (and list (funcall pred (car list)))
+    (setq list (cdr list)))
+  (null list))
+
+(defun rx--foldl (f x l)
+  "(F (F (F X L0) L1) L2) ...
+Left-fold the list L, starting with X, by the binary function F."
+  (while l
+    (setq x (funcall f x (car l)))
+    (setq l (cdr l)))
+  x)
+
+(defun rx--normalise-or-arg (form)
+  "Normalize the `or' argument FORM.
+Characters become strings, user-definitions and `eval' forms are expanded,
+and `or' forms are normalized recursively."
+  (cond ((characterp form)
+         (char-to-string form))
+        ((and (consp form) (memq (car form) '(or |)))
+         (cons (car form) (mapcar #'rx--normalise-or-arg (cdr form))))
+        ((and (consp form) (eq (car form) 'eval))
+         (rx--normalise-or-arg (rx--expand-eval (cdr form))))
+        (t
+         (let ((expanded (rx--expand-def form)))
+           (if expanded
+               (rx--normalise-or-arg expanded)
+             form)))))
+
+(defun rx--all-string-or-args (body)
+  "If BODY only consists of strings or such `or' forms, return all the strings.
+Otherwise throw `rx--nonstring'."
+  (mapcan (lambda (form)
+            (cond ((stringp form) (list form))
+                  ((and (consp form) (memq (car form) '(or |)))
+                   (rx--all-string-or-args (cdr form)))
+                  (t (throw 'rx--nonstring nil))))
+          body))
+
+(defun rx--translate-or (body)
+  "Translate an or-pattern of zero or more rx items.
+Return (REGEXP . PRECEDENCE)."
+  ;; FIXME: Possible improvements:
+  ;;
+  ;; - Flatten sub-patterns first: (or (or A B) (or C D)) -> (or A B C D)
+  ;;   Then call regexp-opt on runs of string arguments. Example:
+  ;;   (or (+ digit) "CHARLIE" "CHAN" (+ blank))
+  ;;   -> (or (+ digit) (or "CHARLIE" "CHAN") (+ blank))
+  ;;
+  ;; - Optimize single-character alternatives better:
+  ;;     * classes: space, alpha, ...
+  ;;     * (syntax S), for some S (whitespace, word)
+  ;;   so that (or "@" "%" digit (any "A-Z" space) (syntax word))
+  ;;        -> (any "@" "%" digit "A-Z" space word)
+  ;;        -> "[A-Z@%[:digit:][:space:][:word:]]"
+  (cond
+   ((null body)                    ; No items: a never-matching regexp.
+    (rx--empty))
+   ((null (cdr body))              ; Single item.
+    (rx--translate (car body)))
+   (t
+    (let* ((args (mapcar #'rx--normalise-or-arg body))
+           (all-strings (catch 'rx--nonstring (rx--all-string-or-args args))))
+      (cond
+       (all-strings                       ; Only strings.
+        (cons (list (regexp-opt all-strings nil))
+              t))
+       ((rx--every #'rx--charset-p args)  ; All charsets.
+        (rx--translate-union nil args))
+       (t
+        (cons (append (car (rx--translate (car args)))
+                      (mapcan (lambda (item)
+                                (cons "\\|" (car (rx--translate item))))
+                              (cdr args)))
+              nil)))))))
+
+(defun rx--charset-p (form)
+  "Whether FORM looks like a charset, only consisting of character intervals
+and set operations."
+  (or (and (consp form)
+           (or (and (memq (car form) '(any in char))
+                    (rx--every (lambda (x) (not (symbolp x))) (cdr form)))
+               (and (memq (car form) '(not or | intersection))
+                    (rx--every #'rx--charset-p (cdr form)))))
+      (characterp form)
+      (and (stringp form) (= (length form) 1))
+      (and (or (symbolp form) (consp form))
+           (let ((expanded (rx--expand-def form)))
+             (and expanded
+                  (rx--charset-p expanded))))))
+
+(defun rx--string-to-intervals (str)
+  "Decode STR as intervals: A-Z becomes (?A . ?Z), and the single
+character X becomes (?X . ?X).  Return the intervals in a list."
+  ;; We could just do string-to-multibyte on the string and work with
+  ;; that instead of this `decode-char' workaround.
+  (let ((decode-char
+         (if (multibyte-string-p str)
+             #'identity
+           #'unibyte-char-to-multibyte))
+        (len (length str))
+        (i 0)
+        (intervals nil))
+    (while (< i len)
+      (cond ((and (< i (- len 2))
+                  (= (aref str (1+ i)) ?-))
+             ;; Range.
+             (let ((start (funcall decode-char (aref str i)))
+                   (end   (funcall decode-char (aref str (+ i 2)))))
+               (cond ((and (<= start #x7f) (>= end #x3fff80))
+                      ;; Ranges between ASCII and raw bytes are split to
+                      ;; avoid having them absorb Unicode characters
+                      ;; caught in-between.
+                      (push (cons start #x7f) intervals)
+                      (push (cons #x3fff80 end) intervals))
+                     ((<= start end)
+                      (push (cons start end) intervals))
+                     (t
+                      (error "Invalid rx `any' range: %s"
+                             (substring str i (+ i 3)))))
+               (setq i (+ i 3))))
+            (t
+             ;; Single character.
+             (let ((char (funcall decode-char (aref str i))))
+               (push (cons char char) intervals))
+             (setq i (+ i 1)))))
+    intervals))
+
+(defun rx--condense-intervals (intervals)
+  "Merge adjacent and overlapping intervals by mutation, preserving the order.
+INTERVALS is a list of (START . END) with START ≤ END, sorted by START."
+  (let ((tail intervals)
+        d)
+    (while (setq d (cdr tail))
+      (if (>= (cdar tail) (1- (caar d)))
+          (progn
+            (setcdr (car tail) (max (cdar tail) (cdar d)))
+            (setcdr tail (cdr d)))
+        (setq tail d)))
+    intervals))
+
+(defun rx--parse-any (body)
+  "Parse arguments of an (any ...) construct.
+Return (INTERVALS . CLASSES), where INTERVALS is a sorted list of
+disjoint intervals (each a cons of chars), and CLASSES
+a list of named character classes in the order they occur in BODY."
+  (let ((classes nil)
+        (strings nil)
+        (conses nil))
+    ;; Collect strings, conses and characters, and classes in separate bins.
+    (dolist (arg body)
+      (cond ((stringp arg)
+             (push arg strings))
+            ((and (consp arg)
+                  (characterp (car arg))
+                  (characterp (cdr arg))
+                  (<= (car arg) (cdr arg)))
+             ;; Copy the cons, in case we need to modify it.
+             (push (cons (car arg) (cdr arg)) conses))
+            ((characterp arg)
+             (push (cons arg arg) conses))
+            ((and (symbolp arg)
+                  (let ((class (cdr (assq arg rx--char-classes))))
+                    (and class
+                         (or (memq class classes)
+                             (progn (push class classes) t))))))
+            (t (error "Invalid rx `any' argument: %s" arg))))
+    (cons (rx--condense-intervals
+           (sort (append conses
+                         (mapcan #'rx--string-to-intervals strings))
+                 #'car-less-than-car))
+          (reverse classes))))
+
+(defun rx--generate-alt (negated intervals classes)
+  "Generate a character alternative.  Return (REGEXP . PRECEDENCE).
+If NEGATED is non-nil, negate the result; INTERVALS is a sorted
+list of disjoint intervals and CLASSES a list of named character
+classes."
+  (let ((items (append intervals classes)))
+    ;; Move lone ] and range ]-x to the start.
+    (let ((rbrac-l (assq ?\] items)))
+      (when rbrac-l
+        (setq items (cons rbrac-l (delq rbrac-l items)))))
+
+    ;; Split x-] and move the lone ] to the start.
+    (let ((rbrac-r (rassq ?\] items)))
+      (when (and rbrac-r (not (eq (car rbrac-r) ?\])))
+        (setcdr rbrac-r ?\\)
+        (setq items (cons '(?\] . ?\]) items))))
+
+    ;; Split ,-- (which would end up as ,- otherwise).
+    (let ((dash-r (rassq ?- items)))
+      (when (eq (car dash-r) ?,)
+        (setcdr dash-r ?,)
+        (setq items (nconc items '((?- . ?-))))))
+
+    ;; Remove - (lone or at start of interval)
+    (let ((dash-l (assq ?- items)))
+      (when dash-l
+        (if (eq (cdr dash-l) ?-)
+            (setq items (delq dash-l items))   ; Remove lone -
+          (setcar dash-l ?.))                  ; Reduce --x to .-x
+        (setq items (nconc items '((?- . ?-))))))
+
+    ;; Deal with leading ^ and range ^-x.
+    (when (and (consp (car items))
+               (eq (caar items) ?^)
+               (cdr items))
+      ;; Move ^ and ^-x to second place.
+      (setq items (cons (cadr items)
+                        (cons (car items) (cddr items)))))
+
+    (cond
+     ;; Empty set: if negated, any char, otherwise match-nothing.
+     ((null items)
+      (if negated
+          (rx--translate-symbol 'anything)
+        (rx--empty)))
+     ;; Single non-negated character.
+     ((and (null (cdr items))
+           (consp (car items))
+           (eq (caar items) (cdar items))
+           (not negated))
+      (cons (list (regexp-quote (char-to-string (caar items))))
+            t))
+     ;; Negated newline.
+     ((and (equal items '((?\n . ?\n)))
+           negated)
+      (rx--translate-symbol 'nonl))
+     ;; At least one character or class, possibly negated.
+     (t
+      (cons
+       (list
+        (concat
+         "["
+         (and negated "^")
+         (mapconcat (lambda (item)
+                      (cond ((symbolp item)
+                             (format "[:%s:]" item))
+                            ((eq (car item) (cdr item))
+                             (char-to-string (car item)))
+                            ((eq (1+ (car item)) (cdr item))
+                             (string (car item) (cdr item)))
+                            (t
+                             (string (car item) ?- (cdr item)))))
+                    items nil)
+         "]"))
+       t)))))
+
+(defun rx--translate-any (negated body)
+  "Translate an (any ...) construct.  Return (REGEXP . PRECEDENCE).
+If NEGATED, negate the sense."
+  (let ((parsed (rx--parse-any body)))
+    (rx--generate-alt negated (car parsed) (cdr parsed))))
+
+(defun rx--intervals-to-alt (negated intervals)
+  "Generate a character alternative from an interval set.
+Return (REGEXP . PRECEDENCE).
+INTERVALS is a sorted list of disjoint intervals.
+If NEGATED, negate the sense."
+  ;; Detect whether the interval set is better described in
+  ;; complemented form.  This is not just a matter of aesthetics: any
+  ;; range from ASCII to raw bytes will automatically exclude the
+  ;; entire non-ASCII Unicode range by the regexp engine.
+  (if (rx--every (lambda (iv) (not (<= (car iv) #x3ffeff (cdr iv))))
+                 intervals)
+      (rx--generate-alt negated intervals nil)
+    (rx--generate-alt
+     (not negated) (rx--complement-intervals intervals) nil)))
+
+;; FIXME: Consider turning `not' into a variadic operator, following SRE:
+;; (not A B) = (not (or A B)) = (intersection (not A) (not B)), and
+;; (not) = anychar.
+;; Maybe allow singleton characters as arguments.
+
+(defun rx--translate-not (negated body)
+  "Translate a (not ...) construct.  Return (REGEXP . PRECEDENCE).
+If NEGATED, negate the sense (thus making it positive)."
+  (unless (and body (null (cdr body)))
+    (error "rx `not' form takes exactly one argument"))
+  (let ((arg (car body)))
+    (cond
+     ((and (consp arg)
+           (pcase (car arg)
+             ((or 'any 'in 'char)
+              (rx--translate-any      (not negated) (cdr arg)))
+             ('syntax
+              (rx--translate-syntax   (not negated) (cdr arg)))
+             ('category
+              (rx--translate-category (not negated) (cdr arg)))
+             ('not
+              (rx--translate-not      (not negated) (cdr arg)))
+             ((or 'or '|)
+              (rx--translate-union    (not negated) (cdr arg)))
+             ('intersection
+              (rx--translate-intersection (not negated) (cdr arg))))))
+     ((let ((class (cdr (assq arg rx--char-classes))))
+        (and class
+             (rx--generate-alt (not negated) nil (list class)))))
+     ((eq arg 'word-boundary)
+      (rx--translate-symbol
+       (if negated 'word-boundary 'not-word-boundary)))
+     ((characterp arg)
+      (rx--generate-alt (not negated) (list (cons arg arg)) nil))
+     ((and (stringp arg) (= (length arg) 1))
+      (let ((char (string-to-char arg)))
+        (rx--generate-alt (not negated) (list (cons char char)) nil)))
+     ((let ((expanded (rx--expand-def arg)))
+        (and expanded
+             (rx--translate-not negated (list expanded)))))
+     (t (error "Illegal argument to rx `not': %S" arg)))))
+
+(defun rx--complement-intervals (intervals)
+  "Complement of the interval list INTERVALS."
+  (let ((compl nil)
+        (c 0))
+    (dolist (iv intervals)
+      (when (< c (car iv))
+        (push (cons c (1- (car iv))) compl))
+      (setq c (1+ (cdr iv))))
+    (when (< c (max-char))
+      (push (cons c (max-char)) compl))
+    (nreverse compl)))
+
+(defun rx--intersect-intervals (ivs-a ivs-b)
+  "Intersection of the interval lists IVS-A and IVS-B."
+  (let ((isect nil))
+    (while (and ivs-a ivs-b)
+      (let ((a (car ivs-a))
+            (b (car ivs-b)))
+        (cond
+         ((< (cdr a) (car b)) (setq ivs-a (cdr ivs-a)))
+         ((> (car a) (cdr b)) (setq ivs-b (cdr ivs-b)))
+         (t
+          (push (cons (max (car a) (car b))
+                      (min (cdr a) (cdr b)))
+                isect)
+          (setq ivs-a (cdr ivs-a))
+          (setq ivs-b (cdr ivs-b))
+          (cond ((< (cdr a) (cdr b))
+                 (push (cons (1+ (cdr a)) (cdr b))
+                       ivs-b))
+                ((> (cdr a) (cdr b))
+                 (push (cons (1+ (cdr b)) (cdr a))
+                       ivs-a)))))))
+    (nreverse isect)))
+
+(defun rx--union-intervals (ivs-a ivs-b)
+  "Union of the interval lists IVS-A and IVS-B."
+  (rx--complement-intervals
+   (rx--intersect-intervals
+    (rx--complement-intervals ivs-a)
+    (rx--complement-intervals ivs-b))))
+
+(defun rx--charset-intervals (charset)
+  "Return a sorted list of non-adjacent disjoint intervals from CHARSET.
+CHARSET is any expression allowed in a character set expression:
+characters, single-char strings, `any' forms (no classes permitted),
+or `not', `or' or `intersection' forms whose arguments are charsets."
+  (pcase charset
+    (`(,(or 'any 'in 'char) . ,body)
+     (let ((parsed (rx--parse-any body)))
+       (when (cdr parsed)
+         (error
+          "Character class not permitted in set operations: %S"
+          (cadr parsed)))
+       (car parsed)))
+    (`(not ,x) (rx--complement-intervals (rx--charset-intervals x)))
+    (`(,(or 'or '|) . ,body) (rx--charset-union body))
+    (`(intersection . ,body) (rx--charset-intersection body))
+    ((pred characterp)
+     (list (cons charset charset)))
+    ((guard (and (stringp charset) (= (length charset) 1)))
+     (let ((char (string-to-char charset)))
+       (list (cons char char))))
+    (_ (let ((expanded (rx--expand-def charset)))
+         (if expanded
+             (rx--charset-intervals expanded)
+           (error "Bad character set: %S" charset))))))
+
+(defun rx--charset-union (charsets)
+  "Union of CHARSETS, as a set of intervals."
+  (rx--foldl #'rx--union-intervals nil
+             (mapcar #'rx--charset-intervals charsets)))
+
+(defconst rx--charset-all (list (cons 0 (max-char))))
+
+(defun rx--charset-intersection (charsets)
+  "Intersection of CHARSETS, as a set of intervals."
+  (rx--foldl #'rx--intersect-intervals rx--charset-all
+             (mapcar #'rx--charset-intervals charsets)))
+
+(defun rx--translate-union (negated body)
+  "Translate an (or ...) construct of charsets.  Return (REGEXP . PRECEDENCE).
+If NEGATED, negate the sense."
+  (rx--intervals-to-alt negated (rx--charset-union body)))
+
+(defun rx--translate-intersection (negated body)
+  "Translate an (intersection ...) construct.  Return (REGEXP . PRECEDENCE).
+If NEGATED, negate the sense."
+  (rx--intervals-to-alt negated (rx--charset-intersection body)))
+
+(defun rx--atomic-regexp (item)
+  "ITEM is (REGEXP . PRECEDENCE); return a regexp of precedence t."
+  (if (eq (cdr item) t)
+      (car item)
+    (rx--bracket (car item))))
+
+(defun rx--translate-counted-repetition (min-count max-count body)
+  (let ((operand (rx--translate-seq body)))
+    (if (car operand)
+        (cons (append
+               (rx--atomic-regexp operand)
+               (list (concat "\\{"
+                             (number-to-string min-count)
+                             (cond ((null max-count) ",")
+                                   ((< min-count max-count)
+                                    (concat "," (number-to-string max-count))))
+                             "\\}")))
+              t)
+      operand)))
+
+(defun rx--check-repeat-arg (name min-args body)
+  (unless (>= (length body) min-args)
+    (error "rx `%s' requires at least %d argument%s"
+           name min-args (if (= min-args 1) "" "s")))
+  ;; There seems to be no reason to disallow zero counts.
+  (unless (natnump (car body))
+    (error "rx `%s' first argument must be nonnegative" name)))
+
+(defun rx--translate-bounded-repetition (name body)
+  (let ((min-count (car body))
+        (max-count (cadr body))
+        (items (cddr body)))
+    (unless (and (natnump min-count)
+                 (natnump max-count)
+                 (<= min-count max-count))
+      (error "rx `%s' range error" name))
+    (rx--translate-counted-repetition min-count max-count items)))
+
+(defun rx--translate-repeat (body)
+  (rx--check-repeat-arg 'repeat 2 body)
+  (if (= (length body) 2)
+      (rx--translate-counted-repetition (car body) (car body) (cdr body))
+    (rx--translate-bounded-repetition 'repeat body)))
+
+(defun rx--translate-** (body)
+  (rx--check-repeat-arg '** 2 body)
+  (rx--translate-bounded-repetition '** body))
+
+(defun rx--translate->= (body)
+  (rx--check-repeat-arg '>= 1 body)
+  (rx--translate-counted-repetition (car body) nil (cdr body)))
+
+(defun rx--translate-= (body)
+  (rx--check-repeat-arg '= 1 body)
+  (rx--translate-counted-repetition (car body) (car body) (cdr body)))
+
+(defvar rx--greedy t)
+
+(defun rx--translate-rep (op-string greedy body)
+  "Translate a repetition; OP-STRING is one of \"*\", \"+\" or \"?\".
+GREEDY is a boolean.  Return (REGEXP . PRECEDENCE)."
+  (let ((operand (rx--translate-seq body)))
+    (if (car operand)
+        (cons (append (rx--atomic-regexp operand)
+                      (list (concat op-string (unless greedy "?"))))
+              ;; The result has precedence seq to avoid (? (* "a")) -> "a*?"
+              'seq)
+      operand)))
+
+(defun rx--control-greedy (greedy body)
+  "Translate the sequence BODY with greediness GREEDY.
+Return (REGEXP . PRECEDENCE)."
+  (let ((rx--greedy greedy))
+    (rx--translate-seq body)))
+
+(defun rx--translate-group (body)
+  "Translate the `group' form.  Return (REGEXP . PRECEDENCE)."
+  (cons (rx--enclose "\\("
+                     (car (rx--translate-seq body))
+                     "\\)")
+        t))
+
+(defun rx--translate-group-n (body)
+  "Translate the `group-n' form.  Return (REGEXP . PRECEDENCE)."
+  (unless (and (integerp (car body)) (> (car body) 0))
+    (error "rx `group-n' requires a positive number as first argument"))
+  (cons (rx--enclose (concat "\\(?" (number-to-string (car body)) ":")
+                     (car (rx--translate-seq (cdr body)))
+                     "\\)")
+        t))
+
+(defun rx--translate-backref (body)
+  "Translate the `backref' form.  Return (REGEXP . PRECEDENCE)."
+  (unless (and (= (length body) 1) (integerp (car body)) (<= 1 (car body) 9))
+    (error "rx `backref' requires an argument in the range 1..9"))
+  (cons (list "\\" (number-to-string (car body))) t))
+
+(defconst rx--syntax-codes
+  '((whitespace         . ?-)           ; SPC also accepted
+    (punctuation        . ?.)
+    (word               . ?w)           ; W also accepted
+    (symbol             . ?_)
+    (open-parenthesis   . ?\()
+    (close-parenthesis  . ?\))
+    (expression-prefix  . ?\')
+    (string-quote       . ?\")
+    (paired-delimiter   . ?$)
+    (escape             . ?\\)
+    (character-quote    . ?/)
+    (comment-start      . ?<)
+    (comment-end        . ?>)
+    (string-delimiter   . ?|)
+    (comment-delimiter  . ?!)))
+
+(defun rx--translate-syntax (negated body)
+  "Translate the `syntax' form.  Return (REGEXP . PRECEDENCE)."
+  (unless (and body (null (cdr body)))
+    (error "rx `syntax' form takes exactly one argument"))
+  (let* ((sym (car body))
+         (syntax (cdr (assq sym rx--syntax-codes))))
+    (unless syntax
+      (cond
+       ;; Syntax character directly (sregex compatibility)
+       ((and (characterp sym) (rassq sym rx--syntax-codes))
+        (setq syntax sym))
+       ;; Syntax character as symbol (sregex compatibility)
+       ((symbolp sym)
+        (let ((name (symbol-name sym)))
+          (when (= (length name) 1)
+            (let ((char (string-to-char name)))
+              (when (rassq char rx--syntax-codes)
+                (setq syntax char)))))))
+      (unless syntax
+        (error "Unknown rx syntax name `%s'" sym)))
+    (cons (list (string ?\\ (if negated ?S ?s) syntax))
+          t)))
+
+(defconst rx--categories
+  '((space-for-indent           . ?\s)
+    (base                       . ?.)
+    (consonant                  . ?0)
+    (base-vowel                 . ?1)
+    (upper-diacritical-mark     . ?2)
+    (lower-diacritical-mark     . ?3)
+    (tone-mark                  . ?4)
+    (symbol                     . ?5)
+    (digit                      . ?6)
+    (vowel-modifying-diacritical-mark . ?7)
+    (vowel-sign                 . ?8)
+    (semivowel-lower            . ?9)
+    (not-at-end-of-line         . ?<)
+    (not-at-beginning-of-line   . ?>)
+    (alpha-numeric-two-byte     . ?A)
+    (chinese-two-byte           . ?C)
+    (chinse-two-byte            . ?C)   ; A typo in Emacs 21.1-24.3.
+    (greek-two-byte             . ?G)
+    (japanese-hiragana-two-byte . ?H)
+    (indian-two-byte            . ?I)
+    (japanese-katakana-two-byte . ?K)
+    (strong-left-to-right       . ?L)
+    (korean-hangul-two-byte     . ?N)
+    (strong-right-to-left       . ?R)
+    (cyrillic-two-byte          . ?Y)
+    (combining-diacritic        . ?^)
+    (ascii                      . ?a)
+    (arabic                     . ?b)
+    (chinese                    . ?c)
+    (ethiopic                   . ?e)
+    (greek                      . ?g)
+    (korean                     . ?h)
+    (indian                     . ?i)
+    (japanese                   . ?j)
+    (japanese-katakana          . ?k)
+    (latin                      . ?l)
+    (lao                        . ?o)
+    (tibetan                    . ?q)
+    (japanese-roman             . ?r)
+    (thai                       . ?t)
+    (vietnamese                 . ?v)
+    (hebrew                     . ?w)
+    (cyrillic                   . ?y)
+    (can-break                  . ?|)))
+
+(defun rx--translate-category (negated body)
+  "Translate the `category' form.  Return (REGEXP . PRECEDENCE)."
+  (unless (and body (null (cdr body)))
+    (error "rx `category' form takes exactly one argument"))
+  (let* ((arg (car body))
+         (category
+          (cond ((symbolp arg)
+                 (let ((cat (assq arg rx--categories)))
+                   (unless cat
+                     (error "Unknown rx category `%s'" arg))
+                   (cdr cat)))
+                ((characterp arg) arg)
+                (t (error "Invalid rx `category' argument `%s'" arg)))))
+    (cons (list (string ?\\ (if negated ?C ?c) category))
+          t)))
+
+(defvar rx--delayed-evaluation nil
+  "Whether to allow certain forms to be evaluated at runtime.")
+
+(defun rx--translate-literal (body)
+  "Translate the `literal' form.  Return (REGEXP . PRECEDENCE)."
+  (unless (and body (null (cdr body)))
+    (error "rx `literal' form takes exactly one argument"))
+  (let ((arg (car body)))
+    (cond ((stringp arg)
+           (cons (list (regexp-quote arg)) (if (= (length arg) 1) t 'seq)))
+          (rx--delayed-evaluation
+           (cons (list (list 'regexp-quote arg)) 'seq))
+          (t (error "rx `literal' form with non-string argument")))))
+
+(defun rx--expand-eval (body)
+  "Expand `eval' arguments.  Return a new rx form."
+  (unless (and body (null (cdr body)))
+    (error "rx `eval' form takes exactly one argument"))
+  (eval (car body)))
+
+(defun rx--translate-eval (body)
+  "Translate the `eval' form.  Return (REGEXP . PRECEDENCE)."
+  (rx--translate (rx--expand-eval body)))
+
+(defvar rx--regexp-atomic-regexp nil)
+
+(defun rx--translate-regexp (body)
+  "Translate the `regexp' form.  Return (REGEXP . PRECEDENCE)."
+  (unless (and body (null (cdr body)))
+    (error "rx `regexp' form takes exactly one argument"))
+  (let ((arg (car body)))
+    (cond ((stringp arg)
+           ;; Generate the regexp when needed, since rx isn't
+           ;; necessarily present in the byte-compilation environment.
+           (unless rx--regexp-atomic-regexp
+             (setq rx--regexp-atomic-regexp
+                   ;; Match atomic (precedence t) regexps: may give
+                   ;; false negatives but no false positives, assuming
+                   ;; the target string is syntactically correct.
+                   (rx-to-string
+                    '(seq
+                      bos
+                      (or (seq "["
+                               (opt "^")
+                               (opt "]")
+                               (* (or (seq "[:" (+ (any "a-z")) ":]")
+                                      (not (any "]"))))
+                               "]")
+                          (not (any "*+?^$[\\"))
+                          (seq "\\"
+                               (or anything
+                                   (seq (any "sScC_") anything)
+                                   (seq "("
+                                        (* (or (not (any "\\"))
+                                               (seq "\\" (not (any ")")))))
+                                        "\\)"))))
+                      eos)
+                    t)))
+           (cons (list arg)
+                 (if (string-match-p rx--regexp-atomic-regexp arg) t nil)))
+          (rx--delayed-evaluation
+           (cons (list arg) nil))
+          (t (error "rx `regexp' form with non-string argument")))))
+
+(defun rx--translate-compat-form (def form)
+  "Translate a compatibility form from `rx-constituents'.
+DEF is the definition tuple.  Return (REGEXP . PRECEDENCE)."
+  (let* ((fn (nth 0 def))
+         (min-args (nth 1 def))
+         (max-args (nth 2 def))
+         (predicate (nth 3 def))
+         (nargs (1- (length form))))
+    (when (< nargs min-args)
+      (error "The `%s' form takes at least %d argument(s)"
+             (car form) min-args))
+    (when (and max-args (> nargs max-args))
+      (error "The `%s' form takes at most %d argument(s)"
+             (car form) max-args))
+    (when (and predicate (not (rx--every predicate (cdr form))))
+      (error "The `%s' form requires arguments satisfying `%s'"
+             (car form) predicate))
+    (let ((regexp (funcall fn form)))
+      (unless (stringp regexp)
+        (error "The `%s' form did not expand to a string" (car form)))
+      (cons (list regexp) nil))))
+
+(defun rx--substitute (bindings form)
+  "Substitute BINDINGS in FORM.  BINDINGS is an alist of (NAME . VALUES)
+where VALUES is a list to splice into FORM wherever NAME occurs.
+Return the substitution result wrapped in a list, since a single value
+can expand to any number of values."
+  (cond ((symbolp form)
+         (let ((binding (assq form bindings)))
+           (if binding
+               (cdr binding)
+             (list form))))
+        ((consp form)
+         (if (listp (cdr form))
+             ;; Proper list.  We substitute variables even in the head
+             ;; position -- who knows, might be handy one day.
+             (list (mapcan (lambda (x) (copy-sequence
+                                        (rx--substitute bindings x)))
+                           form))
+           ;; Cons pair (presumably an interval).
+           (let ((first (rx--substitute bindings (car form)))
+                 (second (rx--substitute bindings (cdr form))))
+             (if (and first (not (cdr first))
+                      second (not (cdr second)))
+                 (list (cons (car first) (car second)))
+               (error
+                "Cannot substitute a &rest parameter into a dotted pair")))))
+        (t (list form))))
+
+;; FIXME: Consider adding extensions in Lisp macro style, where
+;; arguments are passed unevaluated to code that returns the rx form
+;; to use.  Example:
+;;
+;;   (rx-let ((radix-digit (radix)
+;;             :lisp (list 'any (cons ?0 (+ ?0 (eval radix) -1)))))
+;;     (rx (radix-digit (+ 5 3))))
+;; =>
+;;   "[0-7]"
+;;
+;; While this would permit more powerful extensions, it's unclear just
+;; how often they would be used in practice.  Let's wait until there is
+;; demand for it.
+
+;; FIXME: An alternative binding syntax would be
+;;
+;;   (NAME RXs...)
+;; and
+;;   ((NAME ARGS...) RXs...)
+;;
+;; which would have two minor advantages: multiple RXs with implicit
+;; `seq' in the definition, and the arglist is no longer an optional
+;; element in the middle of the list.  On the other hand, it's less
+;; like traditional lisp arglist constructs (defun, defmacro).
+;; Since it's a Scheme-like syntax, &rest parameters could be done using
+;; dotted lists:
+;;  (rx-let (((name arg1 arg2 . rest) ...definition...)) ...)
+
+(defun rx--expand-template (op values arglist template)
+  "Return TEMPLATE with variables in ARGLIST replaced with VALUES."
+  (let ((bindings nil)
+        (value-tail values)
+        (formals arglist))
+    (while formals
+      (pcase (car formals)
+        ('&rest
+         (unless (cdr formals)
+           (error
+            "Expanding rx def `%s': missing &rest parameter name" op))
+         (push (cons (cadr formals) value-tail) bindings)
+         (setq formals nil)
+         (setq value-tail nil))
+        (name
+         (unless value-tail
+           (error
+            "Expanding rx def `%s': too few arguments (got %d, need %s%d)"
+            op (length values)
+            (if (memq '&rest arglist) "at least " "")
+            (- (length arglist) (length (memq '&rest arglist)))))
+         (push (cons name (list (car value-tail))) bindings)
+         (setq value-tail (cdr value-tail))))
+      (setq formals (cdr formals)))
+    (when value-tail
+      (error
+       "Expanding rx def `%s': too many arguments (got %d, need %d)"
+       op (length values) (length arglist)))
+    (let ((subst (rx--substitute bindings template)))
+      (if (and subst (not (cdr subst)))
+          (car subst)
+        (error "Expanding rx def `%s': must result in a single value" op)))))
+
+(defun rx--translate-form (form)
+  "Translate an rx form (list structure).  Return (REGEXP . PRECEDENCE)."
+  (let ((body (cdr form)))
+    (pcase (car form)
+      ((or 'seq : 'and 'sequence) (rx--translate-seq body))
+      ((or 'or '|)              (rx--translate-or body))
+      ((or 'any 'in 'char)      (rx--translate-any nil body))
+      ('not-char                (rx--translate-any t body))
+      ('not                     (rx--translate-not nil body))
+      ('intersection            (rx--translate-intersection nil body))
+
+      ('repeat                  (rx--translate-repeat body))
+      ('=                       (rx--translate-= body))
+      ('>=                      (rx--translate->= body))
+      ('**                      (rx--translate-** body))
+
+      ((or 'zero-or-more '0+)           (rx--translate-rep "*" rx--greedy body))
+      ((or 'one-or-more '1+)            (rx--translate-rep "+" rx--greedy body))
+      ((or 'zero-or-one 'opt 'optional) (rx--translate-rep "?" rx--greedy body))
+
+      ('*                       (rx--translate-rep "*" t body))
+      ('+                       (rx--translate-rep "+" t body))
+      ((or '\? ?\s)             (rx--translate-rep "?" t body))
+
+      ('*?                      (rx--translate-rep "*" nil body))
+      ('+?                      (rx--translate-rep "+" nil body))
+      ((or '\?? ??)             (rx--translate-rep "?" nil body))
+
+      ('minimal-match           (rx--control-greedy nil body))
+      ('maximal-match           (rx--control-greedy t   body))
+
+      ((or 'group 'submatch)     (rx--translate-group body))
+      ((or 'group-n 'submatch-n) (rx--translate-group-n body))
+      ('backref                  (rx--translate-backref body))
+
+      ('syntax                  (rx--translate-syntax nil body))
+      ('not-syntax              (rx--translate-syntax t body))
+      ('category                (rx--translate-category nil body))
+
+      ('literal                 (rx--translate-literal body))
+      ('eval                    (rx--translate-eval body))
+      ((or 'regexp 'regex)      (rx--translate-regexp body))
+
+      (op
+       (cond
+        ((not (symbolp op)) (error "Bad rx operator `%S'" op))
+
+        ((let ((expanded (rx--expand-def form)))
+           (and expanded
+                (rx--translate expanded))))
+
+        ;; For compatibility with old rx.
+        ((let ((entry (assq op rx-constituents)))
+           (and (progn
+                  (while (and entry (not (consp (cdr entry))))
+                    (setq entry
+                          (if (symbolp (cdr entry))
+                              ;; Alias for another entry.
+                              (assq (cdr entry) rx-constituents)
+                            ;; Wrong type, try further down the list.
+                            (assq (car entry)
+                                  (cdr (memq entry rx-constituents))))))
+                  entry)
+                (rx--translate-compat-form (cdr entry) form))))
+
+        (t (error "Unknown rx form `%s'" op)))))))
+
+(defconst rx--builtin-forms
+  '(seq sequence : and or | any in char not-char not intersection
+    repeat = >= **
+    zero-or-more 0+ *
+    one-or-more 1+ +
+    zero-or-one opt optional \?
+    *? +? \??
+    minimal-match maximal-match
+    group submatch group-n submatch-n backref
+    syntax not-syntax category
+    literal eval regexp regex)
+  "List of built-in rx function-like symbols.")
+
+(defconst rx--builtin-symbols
+  (append '(nonl not-newline any anychar anything unmatchable
+            bol eol line-start line-end
+            bos eos string-start string-end
+            bow eow word-start word-end
+            symbol-start symbol-end
+            point word-boundary not-word-boundary not-wordchar)
+          (mapcar #'car rx--char-classes))
+  "List of built-in rx variable-like symbols.")
+
+(defconst rx--builtin-names
+  (append rx--builtin-forms rx--builtin-symbols)
+  "List of built-in rx names.  These cannot be redefined by the user.")
+
+;; Declare Lisp indentation rules for constructs that take 1 or 2
+;; parameters before a body of RX forms.
+;; (`>=' and `=' are omitted because they are more likely to be used
+;; as Lisp functions than RX constructs; `repeat' is a `defcustom' type.)
+(put 'group-n 'lisp-indent-function 1)
+(put 'submatch-n 'lisp-indent-function 1)
+(put '** 'lisp-indent-function 2)
+
+
+(defun rx--translate (item)
+  "Translate the rx-expression ITEM.  Return (REGEXP . PRECEDENCE)."
+  (cond
+   ((stringp item)
+    (if (= (length item) 0)
+        (cons nil 'seq)
+      (cons (list (regexp-quote item)) (if (= (length item) 1) t 'seq))))
+   ((characterp item)
+    (cons (list (regexp-quote (char-to-string item))) t))
+   ((symbolp item)
+    (rx--translate-symbol item))
+   ((consp item)
+    (rx--translate-form item))
+   (t (error "Bad rx expression: %S" item))))
+
+
+;;;###autoload
+(defun rx-to-string (form &optional no-group)
+  "Translate FORM from `rx' sexp syntax into a string regexp.
+The arguments to `literal' and `regexp' forms inside FORM must be
+constant strings.
+If NO-GROUP is non-nil, don't bracket the result in a non-capturing
+group.
+
+For extending the `rx' notation in FORM, use `rx-define' or `rx-let-eval'."
+  (let* ((item (rx--translate form))
+         (exprs (if no-group
+                    (car item)
+                  (rx--atomic-regexp item))))
+    (apply #'concat exprs)))
+
+(defun rx--to-expr (form)
+  "Translate the rx-expression FORM to a Lisp expression yielding a regexp."
+  (let* ((rx--delayed-evaluation t)
+         (elems (car (rx--translate form)))
+         (args nil))
+    ;; Merge adjacent strings.
+    (while elems
+      (let ((strings nil))
+        (while (and elems (stringp (car elems)))
+          (push (car elems) strings)
+          (setq elems (cdr elems)))
+        (let ((s (apply #'concat (nreverse strings))))
+          (unless (zerop (length s))
+            (push s args))))
+      (when elems
+        (push (car elems) args)
+        (setq elems (cdr elems))))
+    (cond ((null args) "")                             ; 0 args
+          ((cdr args) (cons 'concat (nreverse args)))  ; ≥2 args
+          (t (car args)))))                            ; 1 arg
+
+
+;;;###autoload
+(defmacro rx (&rest regexps)
+  "Translate regular expressions REGEXPS in sexp form to a regexp string.
+Each argument is one of the forms below; RX is a subform, and RX... stands
+for zero or more RXs.  For details, see Info node `(elisp) Rx Notation'.
+See `rx-to-string' for the corresponding function.
+
+STRING         Match a literal string.
+CHAR           Match a literal character.
+
+(seq RX...)    Match the RXs in sequence.  Alias: :, sequence, and.
+(or RX...)     Match one of the RXs.  Alias: |.
+
+(zero-or-more RX...) Match RXs zero or more times.  Alias: 0+.
+(one-or-more RX...)  Match RXs one or more times.  Alias: 1+.
+(zero-or-one RX...)  Match RXs or the empty string.  Alias: opt, optional.
+(* RX...)       Match RXs zero or more times; greedy.
+(+ RX...)       Match RXs one or more times; greedy.
+(? RX...)       Match RXs or the empty string; greedy.
+(*? RX...)      Match RXs zero or more times; non-greedy.
+(+? RX...)      Match RXs one or more times; non-greedy.
+(?? RX...)      Match RXs or the empty string; non-greedy.
+(= N RX...)     Match RXs exactly N times.
+(>= N RX...)    Match RXs N or more times.
+(** N M RX...)  Match RXs N to M times.  Alias: repeat.
+(minimal-match RX)  Match RX, with zero-or-more, one-or-more, zero-or-one
+                and aliases using non-greedy matching.
+(maximal-match RX)  Match RX, with zero-or-more, one-or-more, zero-or-one
+                and aliases using greedy matching, which is the default.
+
+(any SET...)    Match a character from one of the SETs.  Each SET is a
+                character, a string, a range as string \"A-Z\" or cons
+                (?A . ?Z), or a character class (see below).  Alias: in, char.
+(not CHARSPEC)  Match one character not matched by CHARSPEC.  CHARSPEC
+                can be a character, single-char string, (any ...), (or ...),
+                (intersection ...), (syntax ...), (category ...),
+                or a character class.
+(intersection CHARSET...) Match all CHARSETs.
+                CHARSET is (any...), (not...), (or...) or (intersection...),
+                a character or a single-char string.
+not-newline     Match any character except a newline.  Alias: nonl.
+anychar         Match any character.  Alias: anything.
+unmatchable     Never match anything at all.
+
+CHARCLASS       Match a character from a character class.  One of:
+ alpha, alphabetic, letter   Alphabetic characters (defined by Unicode).
+ alnum, alphanumeric         Alphabetic or decimal digit chars (Unicode).
+ digit, numeric, num         0-9.
+ xdigit, hex-digit, hex      0-9, A-F, a-f.
+ cntrl, control              ASCII codes 0-31.
+ blank                       Horizontal whitespace (Unicode).
+ space, whitespace, white    Chars with whitespace syntax.
+ lower, lower-case           Lower-case chars, from current case table.
+ upper, upper-case           Upper-case chars, from current case table.
+ graph, graphic              Graphic characters (Unicode).
+ print, printing             Whitespace or graphic (Unicode).
+ punct, punctuation          Not control, space, letter or digit (ASCII);
+                              not word syntax (non-ASCII).
+ word, wordchar              Characters with word syntax.
+ ascii                       ASCII characters (codes 0-127).
+ nonascii                    Non-ASCII characters (but not raw bytes).
+
+(syntax SYNTAX)  Match a character with syntax SYNTAX, being one of:
+  whitespace, punctuation, word, symbol, open-parenthesis,
+  close-parenthesis, expression-prefix, string-quote,
+  paired-delimiter, escape, character-quote, comment-start,
+  comment-end, string-delimiter, comment-delimiter
+
+(category CAT)   Match a character in category CAT, being one of:
+  space-for-indent, base, consonant, base-vowel,
+  upper-diacritical-mark, lower-diacritical-mark, tone-mark, symbol,
+  digit, vowel-modifying-diacritical-mark, vowel-sign,
+  semivowel-lower, not-at-end-of-line, not-at-beginning-of-line,
+  alpha-numeric-two-byte, chinese-two-byte, greek-two-byte,
+  japanese-hiragana-two-byte, indian-two-byte,
+  japanese-katakana-two-byte, strong-left-to-right,
+  korean-hangul-two-byte, strong-right-to-left, cyrillic-two-byte,
+  combining-diacritic, ascii, arabic, chinese, ethiopic, greek,
+  korean, indian, japanese, japanese-katakana, latin, lao,
+  tibetan, japanese-roman, thai, vietnamese, hebrew, cyrillic,
+  can-break
+
+Zero-width assertions: these all match the empty string in specific places.
+ line-start         At the beginning of a line.  Alias: bol.
+ line-end           At the end of a line.  Alias: eol.
+ string-start       At the start of the string or buffer.
+                     Alias: buffer-start, bos, bot.
+ string-end         At the end of the string or buffer.
+                     Alias: buffer-end, eos, eot.
+ point              At point.
+ word-start         At the beginning of a word.  Alias: bow.
+ word-end           At the end of a word.  Alias: eow.
+ word-boundary      At the beginning or end of a word.
+ not-word-boundary  Not at the beginning or end of a word.
+ symbol-start       At the beginning of a symbol.
+ symbol-end         At the end of a symbol.
+
+(group RX...)  Match RXs and define a capture group.  Alias: submatch.
+(group-n N RX...) Match RXs and define capture group N.  Alias: submatch-n.
+(backref N)    Match the text that capture group N matched.
+
+(literal EXPR) Match the literal string from evaluating EXPR at run time.
+(regexp EXPR)  Match the string regexp from evaluating EXPR at run time.
+(eval EXPR)    Match the rx sexp from evaluating EXPR at macro-expansion
+                (compile) time.
+
+Additional constructs can be defined using `rx-define' and `rx-let',
+which see.
+
+\(fn REGEXPS...)"
+  ;; Retrieve local definitions from the macroexpansion environment.
+  ;; (It's unclear whether the previous value of `rx--local-definitions'
+  ;; should be included, and if so, in which order.)
+  (let ((rx--local-definitions
+         (cdr (assq :rx-locals macroexpand-all-environment))))
+    (rx--to-expr (cons 'seq regexps))))
+
+(defun rx--make-binding (name tail)
+  "Make a definitions entry out of TAIL.
+TAIL is on the form ([ARGLIST] DEFINITION)."
+  (unless (symbolp name)
+    (error "Bad `rx' definition name: %S" name))
+  ;; FIXME: Consider using a hash table or symbol property, for speed.
+  (when (memq name rx--builtin-names)
+    (error "Cannot redefine built-in rx name `%s'" name))
+  (pcase tail
+    (`(,def)
+     (list def))
+    (`(,args ,def)
+     (unless (and (listp args) (rx--every #'symbolp args))
+       (error "Bad argument list for `rx' definition %s: %S" name args))
+     (list args def))
+    (_ (error "Bad `rx' definition of %s: %S" name tail))))
+
+(defun rx--make-named-binding (bindspec)
+  "Make a definitions entry out of BINDSPEC.
+BINDSPEC is on the form (NAME [ARGLIST] DEFINITION)."
+  (unless (consp bindspec)
+    (error "Bad `rx-let' binding: %S" bindspec))
+  (cons (car bindspec)
+        (rx--make-binding (car bindspec) (cdr bindspec))))
+
+(defun rx--extend-local-defs (bindspecs)
+  (append (mapcar #'rx--make-named-binding bindspecs)
+          rx--local-definitions))
+
+;;;###autoload
+(defmacro rx-let-eval (bindings &rest body)
+  "Evaluate BODY with local BINDINGS for `rx-to-string'.
+BINDINGS, after evaluation, is a list of definitions each on the form
+(NAME [(ARGS...)] RX), in effect for calls to `rx-to-string'
+in BODY.
+
+For bindings without an ARGS list, NAME is defined as an alias
+for the `rx' expression RX.  Where ARGS is supplied, NAME is
+defined as an `rx' form with ARGS as argument list.  The
+parameters are bound from the values in the (NAME ...) form and
+are substituted in RX.  ARGS can contain `&rest' parameters,
+whose values are spliced into RX where the parameter name occurs.
+
+Any previous definitions with the same names are shadowed during
+the expansion of BODY only.
+For extensions when using the `rx' macro, use `rx-let'.
+To make global rx extensions, use `rx-define'.
+For more details, see Info node `(elisp) Extending Rx'.
+
+\(fn BINDINGS BODY...)"
+  (declare (indent 1) (debug (form body)))
+  ;; FIXME: this way, `rx--extend-local-defs' may need to be autoloaded.
+  `(let ((rx--local-definitions (rx--extend-local-defs ,bindings)))
+     ,@body))
+
+;;;###autoload
+(defmacro rx-let (bindings &rest body)
+  "Evaluate BODY with local BINDINGS for `rx'.
+BINDINGS is an unevaluated list of bindings each on the form
+(NAME [(ARGS...)] RX).
+They are bound lexically and are available in `rx' expressions in
+BODY only.
+
+For bindings without an ARGS list, NAME is defined as an alias
+for the `rx' expression RX.  Where ARGS is supplied, NAME is
+defined as an `rx' form with ARGS as argument list.  The
+parameters are bound from the values in the (NAME ...) form and
+are substituted in RX.  ARGS can contain `&rest' parameters,
+whose values are spliced into RX where the parameter name occurs.
+
+Any previous definitions with the same names are shadowed during
+the expansion of BODY only.
+For local extensions to `rx-to-string', use `rx-let-eval'.
+To make global rx extensions, use `rx-define'.
+For more details, see Info node `(elisp) Extending Rx'.
+
+\(fn BINDINGS BODY...)"
+  (declare (indent 1) (debug (sexp body)))
+  (let ((prev-locals (cdr (assq :rx-locals macroexpand-all-environment)))
+        (new-locals (mapcar #'rx--make-named-binding bindings)))
+    (macroexpand-all (cons 'progn body)
+                     (cons (cons :rx-locals (append new-locals prev-locals))
+                           macroexpand-all-environment))))
+
+;;;###autoload
+(defmacro rx-define (name &rest definition)
+  "Define NAME as a global `rx' definition.
+If the ARGS list is omitted, define NAME as an alias for the `rx'
+expression RX.
+
+If the ARGS list is supplied, define NAME as an `rx' form with
+ARGS as argument list.  The parameters are bound from the values
+in the (NAME ...) form and are substituted in RX.
+ARGS can contain `&rest' parameters, whose values are spliced
+into RX where the parameter name occurs.
+
+Any previous global definition of NAME is overwritten with the new one.
+To make local rx extensions, use `rx-let' for `rx',
+`rx-let-eval' for `rx-to-string'.
+For more details, see Info node `(elisp) Extending Rx'.
+
+\(fn NAME [(ARGS...)] RX)"
+  (declare (indent defun))
+  `(eval-and-compile
+     (put ',name 'rx-definition ',(rx--make-binding name definition))
+     ',name))
+
+;; During `rx--pcase-transform', list of defined variables in right-to-left
+;; order.
+(defvar rx--pcase-vars)
+
+;; FIXME: The rewriting strategy for pcase works so-so with extensions;
+;; definitions cannot expand to `let' or named `backref'.  If this ever
+;; becomes a problem, we can handle those forms in the ordinary parser,
+;; using a dynamic variable for activating the augmented forms.
+
+(defun rx--pcase-transform (rx)
+  "Transform RX, an rx-expression augmented with `let' and named `backref',
+into a plain rx-expression, collecting names into `rx--pcase-vars'."
+  (pcase rx
+    (`(let ,name . ,body)
+     (let* ((index (length (memq name rx--pcase-vars)))
+            (i (if (zerop index)
+                   (length (push name rx--pcase-vars))
+                 index)))
+       `(group-n ,i ,(rx--pcase-transform (cons 'seq body)))))
+    ((and `(backref ,ref)
+          (guard (symbolp ref)))
+     (let ((index (length (memq ref rx--pcase-vars))))
+       (when (zerop index)
+         (error "rx `backref' variable must be one of: %s"
+                (mapconcat #'symbol-name rx--pcase-vars " ")))
+       `(backref ,index)))
+    ((and `(,head . ,rest)
+          (guard (and (or (symbolp head) (memq head '(?\s ??)))
+                      (not (memq head '(literal regexp regex eval))))))
+     (cons head (mapcar #'rx--pcase-transform rest)))
+    (_ rx)))
+
+(defun rx--reduce-right (f l)
+  "Right-reduction on L by F.  L must be non-empty."
+  (if (cdr l)
+      (funcall f (car l) (rx--reduce-right f (cdr l)))
+    (car l)))
+
+;;;###autoload
+(pcase-defmacro rx (&rest regexps)
+  "A pattern that matches strings against `rx' REGEXPS in sexp form.
+REGEXPS are interpreted as in `rx'.  The pattern matches any
+string that is a match for REGEXPS, as if by `string-match'.
+
+In addition to the usual `rx' syntax, REGEXPS can contain the
+following constructs:
+
+  (let REF RX...)  binds the symbol REF to a submatch that matches
+                   the regular expressions RX.  REF is bound in
+                   CODE to the string of the submatch or nil, but
+                   can also be used in `backref'.
+  (backref REF)    matches whatever the submatch REF matched.
+                   REF can be a number, as usual, or a name
+                   introduced by a previous (let REF ...)
+                   construct."
+  (rx--pcase-expand regexps))
+
+;; Autoloaded because it's referred to by the pcase rx macro above,
+;; whose body ends up in loaddefs.el.
+;;;###autoload
+(defun rx--pcase-expand (regexps)
+  (let* ((rx--pcase-vars nil)
+         (regexp (rx--to-expr (rx--pcase-transform (cons 'seq regexps)))))
+    `(and (pred stringp)
+          ,(pcase (length rx--pcase-vars)
+            (0
+             ;; No variables bound: a single predicate suffices.
+             `(pred (string-match ,regexp)))
+            (1
+             ;; Create a match value that on a successful regexp match
+             ;; is the submatch value, 0 on failure.  We can't use nil
+             ;; for failure because it is a valid submatch value.
+             `(app (lambda (s)
+                     (if (string-match ,regexp s)
+                         (match-string 1 s)
+                       0))
+                   (and ,(car rx--pcase-vars) (pred (not numberp)))))
+            (nvars
+             ;; Pack the submatches into a dotted list which is then
+             ;; immediately destructured into individual variables again.
+             ;; This is of course slightly inefficient.
+             ;; A dotted list is used to reduce the number of conses
+             ;; to create and take apart.
+             `(app (lambda (s)
+                     (and (string-match ,regexp s)
+                          ,(rx--reduce-right
+                            (lambda (a b) `(cons ,a ,b))
+                            (mapcar (lambda (i) `(match-string ,i s))
+                                    (number-sequence 1 nvars)))))
+                   ,(list '\`
+                          (rx--reduce-right
+                           #'cons
+                           (mapcar (lambda (name) (list '\, name))
+                                   (reverse rx--pcase-vars))))))))))
+
+;; Obsolete internal symbol, used in old versions of the `flycheck' package.
+(define-obsolete-function-alias 'rx-submatch-n 'rx-to-string "27.1")
+
+(provide 'rx)
+
+;;; rx.el ends here