1 files changed, 322 insertions, 443 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index a16c5da053a..249529e54e3 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -1,4 +1,4 @@
-;;; rx.el --- sexp notation for regular expressions
+;;; rx.el --- sexp notation for regular expressions  -*- lexical-binding: t -*-
 
 ;; Copyright (C) 2001-2019 Free Software Foundation, Inc.
 
@@ -47,57 +47,58 @@
 
 ;; Rx translates a sexp notation for regular expressions into the
 ;; usual string notation.  The translation can be done at compile-time
-;; by using the `rx' macro.  It can be done at run-time by calling
-;; function `rx-to-string'.  See the documentation of `rx' for a
-;; complete description of the sexp notation.
+;; by using the `rx' macro.  The `regexp' and `literal' forms accept
+;; non-constant expressions, in which case `rx' will translate to a
+;; `concat' expression.  Translation can be done fully at run time by
+;; calling function `rx-to-string'.  See the documentation of `rx' for
+;; a complete description of the sexp notation.
 ;;
 ;; Some examples of string regexps and their sexp counterparts:
 ;;
 ;; "^[a-z]*"
-;; (rx (and line-start (0+ (in "a-z"))))
+;; (rx line-start (0+ (in "a-z")))
 ;;
 ;; "\n[^ \t]"
-;; (rx (and "\n" (not (any " \t"))))
+;; (rx ?\n (not (in " \t")))
 ;;
 ;; "\\*\\*\\* EOOH \\*\\*\\*\n"
 ;; (rx "*** EOOH ***\n")
 ;;
 ;; "\\<\\(catch\\|finally\\)\\>[^_]"
-;; (rx (and word-start (submatch (or "catch" "finally")) word-end
-;;          (not (any ?_))))
+;; (rx word-start (submatch (or "catch" "finally")) word-end
+;;     (not (in ?_)))
 ;;
-;; "[ \t\n]*:\\([^:]+\\|$\\)"
-;; (rx (and (zero-or-more (in " \t\n")) ":"
-;;          (submatch (or line-end (one-or-more (not (any ?:)))))))
+;; "[ \t\n]*:\\($\\|[^:]+\\)"
+;; (rx (* (in " \t\n")) ":"
+;;     (submatch (or line-end (+ (not (in ?:))))))
 ;;
-;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*"
-;; (rx (and line-start
-;;          "content-transfer-encoding:"
-;;          (+ (? ?\n)) (any " \t")
-;;	    "quoted-printable"
-;;	    (+ (? ?\n)) (any " \t"))
+;; "^content-transfer-encoding:\\(?:\n?[\t ]\\)*quoted-printable\\(?:\n?[\t ]\\)*"
+;; (rx line-start
+;;     "content-transfer-encoding:"
+;;     (* (? ?\n) (in " \t"))
+;;     "quoted-printable"
+;;     (* (? ?\n) (in " \t")))
 ;;
 ;; (concat "^\\(?:" something-else "\\)")
-;; (rx (and line-start (eval something-else))), statically or
-;; (rx-to-string '(and line-start ,something-else)), dynamically.
+;; (rx line-start (regexp something-else))
 ;;
 ;; (regexp-opt '(STRING1 STRING2 ...))
 ;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically
 ;; calls `regexp-opt' as needed.
 ;;
 ;; "^;;\\s-*\n\\|^\n"
-;; (rx (or (and line-start ";;" (0+ space) ?\n)
-;;         (and line-start ?\n)))
+;; (rx (or (seq line-start ";;" (0+ space) ?\n)
+;;         (seq line-start ?\n)))
 ;;
 ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) "
-;; (rx (and "$Id: "
-;;          (1+ (not (in " ")))
-;;          " "
-;;          (submatch (1+ (not (in " "))))
-;;          " "))
+;; (rx "$Id: "
+;;     (1+ (not (in " ")))
+;;     " "
+;;     (submatch (1+ (not (in " "))))
+;;     " ")
 ;;
 ;; "\\\\\\\\\\[\\w+"
-;; (rx (and ?\\ ?\\ ?\[ (1+ word)))
+;; (rx "\\\\[" (1+ word))
 ;;
 ;; etc.
 
@@ -106,14 +107,17 @@
 
 ;;; Code:
 
+(require 'cl-lib)
+(require 'cl-extra)
+
 ;; FIXME: support macros.
 
 (defvar rx-constituents              ;Not `const' because some modes extend it.
-  '((and		. (rx-and 1 nil))
+  '((and		. (rx-and 0 nil))
     (seq		. and)		; SRE
     (:			. and)		; SRE
     (sequence		. and)		; sregex
-    (or			. (rx-or 1 nil))
+    (or			. (rx-or 0 nil))
     (|			. or)		; SRE
     (not-newline	. ".")
     (nonl		. not-newline)	; SRE
@@ -173,6 +177,7 @@
     (not-syntax		. (rx-not-syntax 1 1)) ; sregex
     (category		. (rx-category 1 1 rx-check-category))
     (eval		. (rx-eval 1 1))
+    (literal		. (rx-literal 1 1 stringp))
     (regexp		. (rx-regexp 1 1 stringp))
     (regex		. regexp)       ; sregex
     (digit		. "[[:digit:]]")
@@ -244,7 +249,9 @@ regular expressions.")
 
 
 (defconst rx-categories
-  '((consonant			. ?0)
+  '((space-for-indent           . ?\s)
+    (base                       . ?.)
+    (consonant			. ?0)
     (base-vowel			. ?1)
     (upper-diacritical-mark	. ?2)
     (lower-diacritical-mark	. ?3)
@@ -263,7 +270,9 @@ regular expressions.")
     (japanese-hiragana-two-byte . ?H)
     (indian-two-byte		. ?I)
     (japanese-katakana-two-byte . ?K)
+    (strong-left-to-right       . ?L)
     (korean-hangul-two-byte	. ?N)
+    (strong-right-to-left       . ?R)
     (cyrillic-two-byte		. ?Y)
     (combining-diacritic	. ?^)
     (ascii			. ?a)
@@ -295,6 +304,10 @@ regular expression strings.")
   "Non-nil means produce greedy regular expressions for `zero-or-one',
 `zero-or-more', and `one-or-more'.  Dynamically bound.")
 
+(defvar rx--compile-to-lisp nil
+  "Nil means return a regexp as a string.
+Non-nil means we may return a lisp form which produces a
+string (used for `rx' macro).")
 
 (defun rx-info (op head)
   "Return parsing/code generation info for OP.
@@ -337,7 +350,7 @@ a standalone symbol."
 	       (> nargs max-args))
       (error "rx form `%s' accepts at most %d args"
 	     (car form) max-args))
-    (when (not (null type-pred))
+    (when type-pred
       (dolist (sub-form (cdr form))
 	(unless (funcall type-pred sub-form)
 	  (error "rx form `%s' requires args satisfying `%s'"
@@ -353,8 +366,9 @@ is non-nil."
    ;; for concatenation
    ((eq group ':)
     (if (rx-atomic-p
-	 (if (string-match
-	      "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp)
+         (if (and (stringp regexp)
+                  (string-match
+                   "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp))
 	     (substring regexp 0 (match-beginning 0))
 	   regexp))
 	(setq group nil)))
@@ -363,9 +377,10 @@ is non-nil."
    ;; do anyway
    ((eq group t))
    ((rx-atomic-p regexp t) (setq group nil)))
-  (if group
-      (concat "\\(?:" regexp "\\)")
-    regexp))
+  (cond ((and group (stringp regexp))
+         (concat "\\(?:" regexp "\\)"))
+        (group `("\\(?:" ,@regexp "\\)"))
+        (t regexp)))
 
 
 (defvar rx-parent)
@@ -377,7 +392,7 @@ is non-nil."
 FORM is of the form `(and FORM1 ...)'."
   (rx-check form)
   (rx-group-if
-   (mapconcat (lambda (x) (rx-form x ':)) (cdr form) nil)
+   (rx--subforms (cdr form) ':)
    (and (memq rx-parent '(* t)) rx-parent)))
 
 
@@ -385,9 +400,11 @@ FORM is of the form `(and FORM1 ...)'."
   "Parse and produce code from FORM, which is `(or FORM1 ...)'."
   (rx-check form)
   (rx-group-if
-   (if (memq nil (mapcar 'stringp (cdr form)))
-       (mapconcat (lambda (x) (rx-form x '|)) (cdr form) "\\|")
-     (regexp-opt (cdr form)))
+   (cond
+    ((null (cdr form)) regexp-unmatchable)
+    ((cl-every #'stringp (cdr form))
+     (regexp-opt (cdr form) nil t))
+    (t (rx--subforms (cdr form) '| "\\|")))
    (and (memq rx-parent '(: * t)) rx-parent)))
 
 
@@ -423,6 +440,13 @@ Only both edges of each range is checked."
     ;; set L list of all ranges
     (mapc (lambda (e) (cond ((stringp e) (push e str))
 			    ((numberp e) (push (cons e e) l))
+                            ;; Ranges between ASCII and raw bytes are split,
+                            ;; to prevent accidental inclusion of Unicode
+                            ;; characters later on.
+                            ((and (<= (car e) #x7f)
+                                  (>= (cdr e) #x3fff80))
+                             (push (cons (car e) #x7f) l)
+                             (push (cons #x3fff80 (cdr e)) l))
 			    (t (push e l))))
 	  args)
     ;; condense overlapped ranges in L
@@ -447,28 +471,38 @@ Only both edges of each range is checked."
 
 
 (defun rx-check-any-string (str)
-  "Check string argument STR for Rx `any'."
-  (let ((i 0)
-	c1 c2 l)
-    (if (= 0 (length str))
-	(error "String arg for Rx `any' must not be empty"))
-    (while (string-match ".-." str i)
-      ;; string before range: convert it to characters
-      (if (< i (match-beginning 0))
-	  (setq l (nconc
-		   l
-		   (append (substring str i (match-beginning 0)) nil))))
-      ;; range
-      (setq i (match-end 0)
-	    c1 (aref str (match-beginning 0))
-	    c2 (aref str (1- i)))
-      (cond
-       ((< c1 c2) (setq l (nconc l (list (cons c1 c2)))))
-       ((= c1 c2) (setq l (nconc l (list c1))))))
-    ;; rest?
-    (if (< i (length str))
-	(setq l (nconc l (append (substring str i) nil))))
-    l))
+  "Turn the `any' argument string STR into a list of characters.
+The original order is not preserved.  Ranges, \"A-Z\", become pairs, (?A . ?Z)."
+  (let ((decode-char
+         ;; Make sure raw bytes are decoded as such, to avoid confusion with
+         ;; U+0080..U+00FF.
+         (if (multibyte-string-p str)
+             #'identity
+           (lambda (c) (if (<= #x80 c #xff)
+                           (+ c #x3fff00)
+                         c))))
+        (len (length str))
+        (i 0)
+        (ret nil))
+    (if (= 0 len)
+        (error "String arg for Rx `any' must not be empty"))
+    (while (< i len)
+      (cond ((and (< i (- len 2))
+                  (= (aref str (+ i 1)) ?-))
+             ;; Range.
+             (let ((start (funcall decode-char (aref str i)))
+                   (end   (funcall decode-char (aref str (+ i 2)))))
+               (cond ((< start end) (push (cons start end) ret))
+                     ((= start end) (push start ret))
+                     (t
+                      (error "Rx character range `%c-%c' is reversed"
+                             start end)))
+               (setq i (+ i 3))))
+            (t
+             ;; Single character.
+             (push (funcall decode-char (aref str i)) ret)
+             (setq i (+ i 1)))))
+    ret))
 
 
 (defun rx-check-any (arg)
@@ -483,7 +517,10 @@ Only both edges of each range is checked."
 	       (null (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'" translation)))
 	   (error "Invalid char class `%s' in Rx `any'" arg))
        (list (substring translation 1 -1)))) ; strip outer brackets
-    ((and (integerp (car-safe arg)) (integerp (cdr-safe arg)))
+    ((and (characterp (car-safe arg)) (characterp (cdr-safe arg)))
+     (unless (<= (car arg) (cdr arg))
+       (error "Rx character range `%c-%c' is reversed"
+              (car arg) (cdr arg)))
      (list arg))
     ((stringp arg) (rx-check-any-string arg))
     ((error
@@ -589,7 +626,7 @@ ARG is optional."
   (rx-check form)
   (let ((result (rx-form (cadr form) '!))
 	case-fold-search)
-    (cond ((string-match "\\`\\[^" result)
+    (cond ((string-match "\\`\\[\\^" result)
 	   (cond
 	    ((equal result "[^]") "[^^]")
 	    ((and (= (length result) 4) (null (eq rx-parent '!)))
@@ -640,7 +677,10 @@ If SKIP is non-nil, allow that number of items after the head, i.e.
   (unless (and (integerp (nth 1 form))
 	       (> (nth 1 form) 0))
     (error "rx `=' requires positive integer first arg"))
-  (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
+  (let ((subform (rx-form (nth 2 form) '*)))
+    (if (stringp subform)
+        (format "%s\\{%d\\}" subform (nth 1 form))
+      `(,@subform ,(format "\\{%d\\}" (nth 1 form))))))
 
 
 (defun rx->= (form)
@@ -650,7 +690,10 @@ If SKIP is non-nil, allow that number of items after the head, i.e.
   (unless (and (integerp (nth 1 form))
 	       (> (nth 1 form) 0))
     (error "rx `>=' requires positive integer first arg"))
-  (format "%s\\{%d,\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
+  (let ((subform (rx-form (nth 2 form) '*)))
+    (if (stringp subform)
+        (format "%s\\{%d,\\}" subform (nth 1 form))
+      `(,@subform ,(format "\\{%d,\\}" (nth 1 form))))))
 
 
 (defun rx-** (form)
@@ -671,7 +714,10 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'."
 	 (unless (and (integerp (nth 1 form))
 		      (> (nth 1 form) 0))
 	   (error "rx `repeat' requires positive integer first arg"))
-	 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
+         (let ((subform (rx-form (nth 2 form) '*)))
+           (if (stringp subform)
+               (format "%s\\{%d\\}" subform (nth 1 form))
+             `(,@subform ,(format "\\{%d\\}" (nth 1 form))))))
 	((or (not (integerp (nth 2 form)))
 	     (< (nth 2 form) 0)
 	     (not (integerp (nth 1 form)))
@@ -679,30 +725,28 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'."
 	     (< (nth 2 form) (nth 1 form)))
 	 (error "rx `repeat' range error"))
 	(t
-	 (format "%s\\{%d,%d\\}" (rx-form (nth 3 form) '*)
-		 (nth 1 form) (nth 2 form)))))
+         (let ((subform (rx-form (nth 3 form) '*)))
+           (if (stringp subform)
+               (format "%s\\{%d,%d\\}" subform (nth 1 form) (nth 2 form))
+             `(,@subform ,(format "\\{%d,%d\\}" (nth 1 form) (nth 2 form))))))))
 
 
 (defun rx-submatch (form)
   "Parse and produce code from FORM, which is `(submatch ...)'."
-  (concat "\\("
-          (if (= 2 (length form))
-              ;; Only one sub-form.
-              (rx-form (cadr form))
-            ;; Several sub-forms implicitly concatenated.
-            (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil))
-          "\\)"))
+  (let ((subforms (rx--subforms (cdr form) ':)))
+    (if (stringp subforms)
+        (concat "\\(" subforms "\\)")
+      `("\\(" ,@subforms "\\)"))))
 
 (defun rx-submatch-n (form)
   "Parse and produce code from FORM, which is `(submatch-n N ...)'."
-  (let ((n (nth 1 form)))
-    (concat "\\(?" (number-to-string n) ":"
-	    (if (= 3 (length form))
-		;; Only one sub-form.
-		(rx-form (nth 2 form))
-	      ;; Several sub-forms implicitly concatenated.
-	      (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil))
-	    "\\)")))
+  (let ((n (nth 1 form))
+        (subforms (rx--subforms (cddr form) ':)))
+    (unless (and (integerp n) (> n 0))
+      (error "rx `submatch-n' argument must be positive"))
+    (if (stringp subforms)
+        (concat "\\(?" (number-to-string n) ":" subforms "\\)")
+      `("\\(?" ,(number-to-string n) ":" ,@subforms "\\)"))))
 
 (defun rx-backref (form)
   "Parse and produce code from FORM, which is `(backref N)'."
@@ -724,15 +768,18 @@ If OP is anything else, produce a greedy regexp if `rx-greedy-flag'
 is non-nil."
   (rx-check form)
   (setq form (rx-trans-forms form))
-  (let ((suffix (cond ((memq (car form) '(* + ?\s)) "")
-		      ((memq (car form) '(*? +? ??)) "?")
+  (let ((suffix (cond ((memq (car form) '(* + \? ?\s)) "")
+		      ((memq (car form) '(*? +? \?? ??)) "?")
 		      (rx-greedy-flag "")
 		      (t "?")))
 	(op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*")
 		  ((memq (car form) '(+ +? 1+ one-or-more))  "+")
-		  (t "?"))))
+                  (t "?")))
+        (subform (rx-form (cadr form) '*)))
     (rx-group-if
-     (concat (rx-form (cadr form) '*) op suffix)
+     (if (stringp subform)
+         (concat subform op suffix)
+       `(,@subform ,(concat op suffix)))
      (and (memq rx-parent '(t *)) rx-parent))))
 
 
@@ -760,15 +807,18 @@ regexps that are atomic but end in operators, such as
 be detected without much effort.  A guarantee of no false
 negatives would require a theoretic specification of the set
 of all atomic regexps."
-  (let ((l (length r)))
-    (cond
-     ((<= l 1))
-     ((= l 2) (= (aref r 0) ?\\))
-     ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r))
-     ((null lax)
+  (if (and rx--compile-to-lisp
+           (not (stringp r)))
+      nil ;; Runtime value, we must assume non-atomic.
+    (let ((l (length r)))
       (cond
-       ((string-match "\\`\\[^?\]?\\(?:\\[:[a-z]+:]\\|[^]]\\)*\\]\\'" r))
-       ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^)]\\)*\\\\)\\'" r)))))))
+       ((<= l 1))
+       ((= l 2) (= (aref r 0) ?\\))
+       ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r))
+       ((null lax)
+        (cond
+         ((string-match "\\`\\[\\^?]?\\(?:\\[:[a-z]+:]\\|[^]]\\)*]\\'" r))
+         ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^)]\\)*\\\\)\\'" r))))))))
 
 
 (defun rx-syntax (form)
@@ -824,360 +874,197 @@ If FORM is `(minimal-match FORM1)', non-greedy versions of `*',
 
 (defun rx-regexp (form)
   "Parse and produce code from FORM, which is `(regexp STRING)'."
-  (rx-check form)
-  (rx-group-if (cadr form) rx-parent))
-
-
-(defun rx-form (form &optional rx-parent)
+  (cond ((stringp (cadr form))
+         (rx-group-if (cadr form) rx-parent))
+        (rx--compile-to-lisp
+         ;; Always group non-string forms, since we can't be sure they
+         ;; are atomic.
+         (rx-group-if (cdr form) t))
+        (t (rx-check form))))
+
+(defun rx-literal (form)
+  "Parse and produce code from FORM, which is `(literal STRING-EXP)'."
+  (cond ((stringp (cadr form))
+         ;; This is allowed, but makes little sense, you could just
+         ;; use STRING directly.
+         (rx-group-if (regexp-quote (cadr form)) rx-parent))
+        (rx--compile-to-lisp
+         (rx-group-if `((regexp-quote ,(cadr form))) rx-parent))
+        (t (rx-check form))))
+
+(defun rx-form (form &optional parent)
   "Parse and produce code for regular expression FORM.
 FORM is a regular expression in sexp form.
-RX-PARENT shows which type of expression calls and controls putting of
+PARENT shows which type of expression calls and controls putting of
 shy groups around the result and some more in other functions."
-  (cond
-   ((stringp form)
-    (rx-group-if (regexp-quote form)
-                 (if (and (eq rx-parent '*) (< 1 (length form)))
-                     rx-parent)))
-   ((integerp form)
-    (regexp-quote (char-to-string form)))
-   ((symbolp form)
-    (let ((info (rx-info form nil)))
-      (cond ((stringp info)
-             info)
-            ((null info)
-             (error "Unknown rx form `%s'" form))
-            (t
-             (funcall (nth 0 info) form)))))
-   ((consp form)
-    (let ((info (rx-info (car form) 'head)))
-      (unless (consp info)
-        (error "Unknown rx form `%s'" (car form)))
-      (funcall (nth 0 info) form)))
-   (t
-    (error "rx syntax error at `%s'" form))))
+  (let ((rx-parent parent))
+    (cond
+     ((stringp form)
+      (rx-group-if (regexp-quote form)
+                   (if (and (eq parent '*) (< 1 (length form)))
+                       parent)))
+     ((integerp form)
+      (regexp-quote (char-to-string form)))
+     ((symbolp form)
+      (let ((info (rx-info form nil)))
+        (cond ((stringp info)
+               info)
+              ((null info)
+               (error "Unknown rx form `%s'" form))
+              (t
+               (funcall (nth 0 info) form)))))
+     ((consp form)
+      (let ((info (rx-info (car form) 'head)))
+        (unless (consp info)
+          (error "Unknown rx form `%s'" (car form)))
+        (funcall (nth 0 info) form)))
+     (t
+      (error "rx syntax error at `%s'" form)))))
+
+(defun rx--subforms (subforms &optional parent separator)
+  "Produce code for regular expressions SUBFORMS.
+SUBFORMS is a list of regular expression sexps.
+PARENT controls grouping, as in `rx-form'.
+Insert SEPARATOR between the code from each of SUBFORMS."
+  (if (null (cdr subforms))
+      ;; Zero or one forms, no need for grouping.
+      (and subforms (rx-form (car subforms)))
+    (let ((listify (lambda (x)
+                     (if (listp x) (copy-sequence x)
+                       (list x)))))
+      (setq subforms (mapcar (lambda (x) (rx-form x parent)) subforms))
+      (cond ((or (not rx--compile-to-lisp)
+                 (cl-every #'stringp subforms))
+             (mapconcat #'identity subforms separator))
+            (separator
+             (nconc (funcall listify (car subforms))
+                    (mapcan (lambda (x)
+                              (cons separator (funcall listify x)))
+                            (cdr subforms))))
+            (t (mapcan listify subforms))))))
 
 
 ;;;###autoload
 (defun rx-to-string (form &optional no-group)
   "Parse and produce code for regular expression FORM.
 FORM is a regular expression in sexp form.
-NO-GROUP non-nil means don't put shy groups around the result."
+NO-GROUP non-nil means don't put shy groups around the result.
+
+In contrast to the `rx' macro, subforms `literal' and `regexp'
+will not accept non-string arguments, i.e., (literal STRING)
+becomes just a more verbose version of STRING."
   (rx-group-if (rx-form form) (null no-group)))
 
 
 ;;;###autoload
 (defmacro rx (&rest regexps)
   "Translate regular expressions REGEXPS in sexp form to a regexp string.
-REGEXPS is a non-empty sequence of forms of the sort listed below.
-
-Note that `rx' is a Lisp macro; when used in a Lisp program being
-compiled, the translation is performed by the compiler.
-See `rx-to-string' for how to do such a translation at run-time.
-
-The following are valid subforms of regular expressions in sexp
-notation.
-
-STRING
-     matches string STRING literally.
-
-CHAR
-     matches character CHAR literally.
-
-`not-newline', `nonl'
-     matches any character except a newline.
-
-`anything'
-     matches any character
-
-`(any SET ...)'
-`(in SET ...)'
-`(char SET ...)'
-     matches any character in SET ....  SET may be a character or string.
-     Ranges of characters can be specified as `A-Z' in strings.
-     Ranges may also be specified as conses like `(?A . ?Z)'.
-
-     SET may also be the name of a character class: `digit',
-     `control', `hex-digit', `blank', `graph', `print', `alnum',
-     `alpha', `ascii', `nonascii', `lower', `punct', `space', `upper',
-     `word', or one of their synonyms.
-
-`(not (any SET ...))'
-     matches any character not in SET ...
-
-`line-start', `bol'
-     matches the empty string, but only at the beginning of a line
-     in the text being matched
-
-`line-end', `eol'
-     is similar to `line-start' but matches only at the end of a line
-
-`string-start', `bos', `bot'
-     matches the empty string, but only at the beginning of the
-     string being matched against.
-
-`string-end', `eos', `eot'
-     matches the empty string, but only at the end of the
-     string being matched against.
-
-`buffer-start'
-     matches the empty string, but only at the beginning of the
-     buffer being matched against.  Actually equivalent to `string-start'.
-
-`buffer-end'
-     matches the empty string, but only at the end of the
-     buffer being matched against.  Actually equivalent to `string-end'.
-
-`point'
-     matches the empty string, but only at point.
-
-`word-start', `bow'
-     matches the empty string, but only at the beginning of a word.
-
-`word-end', `eow'
-     matches the empty string, but only at the end of a word.
-
-`word-boundary'
-     matches the empty string, but only at the beginning or end of a
-     word.
-
-`(not word-boundary)'
-`not-word-boundary'
-     matches the empty string, but not at the beginning or end of a
-     word.
-
-`symbol-start'
-     matches the empty string, but only at the beginning of a symbol.
-
-`symbol-end'
-     matches the empty string, but only at the end of a symbol.
-
-`digit', `numeric', `num'
-     matches 0 through 9.
-
-`control', `cntrl'
-     matches ASCII control characters.
-
-`hex-digit', `hex', `xdigit'
-     matches 0 through 9, a through f and A through F.
-
-`blank'
-     matches horizontal whitespace, as defined by Annex C of the
-     Unicode Technical Standard #18.  In particular, it matches
-     spaces, tabs, and other characters whose Unicode
-     `general-category' property indicates they are spacing
-     separators.
-
-`graphic', `graph'
-     matches graphic characters--everything except whitespace, ASCII
-     and non-ASCII control characters, surrogates, and codepoints
-     unassigned by Unicode.
-
-`printing', `print'
-     matches whitespace and graphic characters.
-
-`alphanumeric', `alnum'
-     matches alphabetic characters and digits.  For multibyte characters,
-     it matches characters whose Unicode `general-category' property
-     indicates they are alphabetic or decimal number characters.
-
-`letter', `alphabetic', `alpha'
-     matches alphabetic characters.  For multibyte characters,
-     it matches characters whose Unicode `general-category' property
-     indicates they are alphabetic characters.
-
-`ascii'
-     matches ASCII (unibyte) characters.
-
-`nonascii'
-     matches non-ASCII (multibyte) characters.
-
-`lower', `lower-case'
-     matches anything lower-case, as determined by the current case
-     table.  If `case-fold-search' is non-nil, this also matches any
-     upper-case letter.
-
-`upper', `upper-case'
-     matches anything upper-case, as determined by the current case
-     table.  If `case-fold-search' is non-nil, this also matches any
-     lower-case letter.
-
-`punctuation', `punct'
-     matches punctuation.  (But at present, for multibyte characters,
-     it matches anything that has non-word syntax.)
-
-`space', `whitespace', `white'
-     matches anything that has whitespace syntax.
-
-`word', `wordchar'
-     matches anything that has word syntax.
-
-`not-wordchar'
-     matches anything that has non-word syntax.
-
-`(syntax SYNTAX)'
-     matches a character with syntax SYNTAX.  SYNTAX must be one
-     of the following symbols, or a symbol corresponding to the syntax
-     character, e.g. `\\.' for `\\s.'.
-
-     `whitespace'		(\\s- in string notation)
-     `punctuation'		(\\s.)
-     `word'			(\\sw)
-     `symbol'			(\\s_)
-     `open-parenthesis'		(\\s()
-     `close-parenthesis'	(\\s))
-     `expression-prefix'	(\\s')
-     `string-quote'		(\\s\")
-     `paired-delimiter'		(\\s$)
-     `escape'			(\\s\\)
-     `character-quote'		(\\s/)
-     `comment-start'		(\\s<)
-     `comment-end'		(\\s>)
-     `string-delimiter'		(\\s|)
-     `comment-delimiter'	(\\s!)
-
-`(not (syntax SYNTAX))'
-     matches a character that doesn't have syntax SYNTAX.
-
-`(category CATEGORY)'
-     matches a character with category CATEGORY.  CATEGORY must be
-     either a character to use for C, or one of the following symbols.
-
-     `consonant'			(\\c0 in string notation)
-     `base-vowel'			(\\c1)
-     `upper-diacritical-mark'		(\\c2)
-     `lower-diacritical-mark'		(\\c3)
-     `tone-mark'		        (\\c4)
-     `symbol'			        (\\c5)
-     `digit'			        (\\c6)
-     `vowel-modifying-diacritical-mark'	(\\c7)
-     `vowel-sign'			(\\c8)
-     `semivowel-lower'			(\\c9)
-     `not-at-end-of-line'		(\\c<)
-     `not-at-beginning-of-line'		(\\c>)
-     `alpha-numeric-two-byte'		(\\cA)
-     `chinese-two-byte'			(\\cC)
-     `greek-two-byte'			(\\cG)
-     `japanese-hiragana-two-byte'	(\\cH)
-     `indian-two-byte'			(\\cI)
-     `japanese-katakana-two-byte'	(\\cK)
-     `korean-hangul-two-byte'		(\\cN)
-     `cyrillic-two-byte'		(\\cY)
-     `combining-diacritic'		(\\c^)
-     `ascii'				(\\ca)
-     `arabic'				(\\cb)
-     `chinese'				(\\cc)
-     `ethiopic'				(\\ce)
-     `greek'				(\\cg)
-     `korean'				(\\ch)
-     `indian'				(\\ci)
-     `japanese'				(\\cj)
-     `japanese-katakana'		(\\ck)
-     `latin'				(\\cl)
-     `lao'				(\\co)
-     `tibetan'				(\\cq)
-     `japanese-roman'			(\\cr)
-     `thai'				(\\ct)
-     `vietnamese'			(\\cv)
-     `hebrew'				(\\cw)
-     `cyrillic'				(\\cy)
-     `can-break'			(\\c|)
-
-`(not (category CATEGORY))'
-     matches a character that doesn't have category CATEGORY.
-
-`(and SEXP1 SEXP2 ...)'
-`(: SEXP1 SEXP2 ...)'
-`(seq SEXP1 SEXP2 ...)'
-`(sequence SEXP1 SEXP2 ...)'
-     matches what SEXP1 matches, followed by what SEXP2 matches, etc.
-
-`(submatch SEXP1 SEXP2 ...)'
-`(group SEXP1 SEXP2 ...)'
-     like `and', but makes the match accessible with `match-end',
-     `match-beginning', and `match-string'.
-
-`(submatch-n N SEXP1 SEXP2 ...)'
-`(group-n N SEXP1 SEXP2 ...)'
-     like `group', but make it an explicitly-numbered group with
-     group number N.
-
-`(or SEXP1 SEXP2 ...)'
-`(| SEXP1 SEXP2 ...)'
-     matches anything that matches SEXP1 or SEXP2, etc.  If all
-     args are strings, use `regexp-opt' to optimize the resulting
-     regular expression.
-
-`(minimal-match SEXP)'
-     produce a non-greedy regexp for SEXP.  Normally, regexps matching
-     zero or more occurrences of something are \"greedy\" in that they
-     match as much as they can, as long as the overall regexp can
-     still match.  A non-greedy regexp matches as little as possible.
-
-`(maximal-match SEXP)'
-     produce a greedy regexp for SEXP.  This is the default.
-
-Below, `SEXP ...' represents a sequence of regexp forms, treated as if
-enclosed in `(and ...)'.
-
-`(zero-or-more SEXP ...)'
-`(0+ SEXP ...)'
-     matches zero or more occurrences of what SEXP ... matches.
-
-`(* SEXP ...)'
-     like `zero-or-more', but always produces a greedy regexp, independent
-     of `rx-greedy-flag'.
-
-`(*? SEXP ...)'
-     like `zero-or-more', but always produces a non-greedy regexp,
-     independent of `rx-greedy-flag'.
-
-`(one-or-more SEXP ...)'
-`(1+ SEXP ...)'
-     matches one or more occurrences of SEXP ...
-
-`(+ SEXP ...)'
-     like `one-or-more', but always produces a greedy regexp.
-
-`(+? SEXP ...)'
-     like `one-or-more', but always produces a non-greedy regexp.
-
-`(zero-or-one SEXP ...)'
-`(optional SEXP ...)'
-`(opt SEXP ...)'
-     matches zero or one occurrences of A.
-
-`(? SEXP ...)'
-     like `zero-or-one', but always produces a greedy regexp.
-
-`(?? SEXP ...)'
-     like `zero-or-one', but always produces a non-greedy regexp.
-
-`(repeat N SEXP)'
-`(= N SEXP ...)'
-     matches N occurrences.
-
-`(>= N SEXP ...)'
-     matches N or more occurrences.
-
-`(repeat N M SEXP)'
-`(** N M SEXP ...)'
-     matches N to M occurrences.
-
-`(backref N)'
-     matches what was matched previously by submatch N.
-
-`(eval FORM)'
-     evaluate FORM and insert result.  If result is a string,
-     `regexp-quote' it.
-
-`(regexp REGEXP)'
-     include REGEXP in string notation in the result."
-  (cond ((null regexps)
-	 (error "No regexp"))
-	((cdr regexps)
-	 (rx-to-string `(and ,@regexps) t))
-	(t
-	 (rx-to-string (car regexps) t))))
+Each argument is one of the forms below; RX is a subform, and RX... stands
+for one or more RXs.  For details, see Info node `(elisp) Rx Notation'.
+See `rx-to-string' for the corresponding function.
+
+STRING         Match a literal string.
+CHAR           Match a literal character.
+
+(seq RX...)    Match the RXs in sequence.  Alias: :, sequence, and.
+(or RX...)     Match one of the RXs.  Alias: |.
+
+(zero-or-more RX...) Match RXs zero or more times.  Alias: 0+.
+(one-or-more RX...)  Match RXs one or more times.  Alias: 1+.
+(zero-or-one RX...)  Match RXs or the empty string.  Alias: opt, optional.
+(* RX...)       Match RXs zero or more times; greedy.
+(+ RX...)       Match RXs one or more times; greedy.
+(? RX...)       Match RXs or the empty string; greedy.
+(*? RX...)      Match RXs zero or more times; non-greedy.
+(+? RX...)      Match RXs one or more times; non-greedy.
+(?? RX...)      Match RXs or the empty string; non-greedy.
+(= N RX...)     Match RXs exactly N times.
+(>= N RX...)    Match RXs N or more times.
+(** N M RX...)  Match RXs N to M times.  Alias: repeat.
+(minimal-match RX)  Match RX, with zero-or-more, one-or-more, zero-or-one
+                and aliases using non-greedy matching.
+(maximal-match RX)  Match RX, with zero-or-more, one-or-more, zero-or-one
+                and aliases using greedy matching, which is the default.
+
+(any SET...)    Match a character from one of the SETs.  Each SET is a
+                character, a string, a range as string \"A-Z\" or cons
+                (?A . ?Z), or a character class (see below).  Alias: in, char.
+(not CHARSPEC)  Match one character not matched by CHARSPEC.  CHARSPEC
+                can be (any ...), (syntax ...), (category ...),
+                or a character class.
+not-newline     Match any character except a newline.  Alias: nonl.
+anything        Match any character.
+
+CHARCLASS       Match a character from a character class.  One of:
+ alpha, alphabetic, letter   Alphabetic characters (defined by Unicode).
+ alnum, alphanumeric         Alphabetic or decimal digit chars (Unicode).
+ digit numeric, num          0-9.
+ xdigit, hex-digit, hex      0-9, A-F, a-f.
+ cntrl, control              ASCII codes 0-31.
+ blank                       Horizontal whitespace (Unicode).
+ space, whitespace, white    Chars with whitespace syntax.
+ lower, lower-case           Lower-case chars, from current case table.
+ upper, upper-case           Upper-case chars, from current case table.
+ graph, graphic              Graphic characters (Unicode).
+ print, printing             Whitespace or graphic (Unicode).
+ punct, punctuation          Not control, space, letter or digit (ASCII);
+                              not word syntax (non-ASCII).
+ word, wordchar              Characters with word syntax.
+ ascii                       ASCII characters (codes 0-127).
+ nonascii                    Non-ASCII characters (but not raw bytes).
+
+(syntax SYNTAX)  Match a character with syntax SYNTAX, being one of:
+  whitespace, punctuation, word, symbol, open-parenthesis,
+  close-parenthesis, expression-prefix, string-quote,
+  paired-delimiter, escape, character-quote, comment-start,
+  comment-end, string-delimiter, comment-delimiter
+
+(category CAT)   Match a character in category CAT, being one of:
+  space-for-indent, base, consonant, base-vowel,
+  upper-diacritical-mark, lower-diacritical-mark, tone-mark, symbol,
+  digit, vowel-modifying-diacritical-mark, vowel-sign,
+  semivowel-lower, not-at-end-of-line, not-at-beginning-of-line,
+  alpha-numeric-two-byte, chinese-two-byte, greek-two-byte,
+  japanese-hiragana-two-byte, indian-two-byte,
+  japanese-katakana-two-byte, strong-left-to-right,
+  korean-hangul-two-byte, strong-right-to-left, cyrillic-two-byte,
+  combining-diacritic, ascii, arabic, chinese, ethiopic, greek,
+  korean, indian, japanese, japanese-katakana, latin, lao,
+  tibetan, japanese-roman, thai, vietnamese, hebrew, cyrillic,
+  can-break
+
+Zero-width assertions: these all match the empty string in specific places.
+ line-start         At the beginning of a line.  Alias: bol.
+ line-end           At the end of a line.  Alias: eol.
+ string-start       At the start of the string or buffer.
+                     Alias: buffer-start, bos, bot.
+ string-end         At the end of the string or buffer.
+                     Alias: buffer-end, eos, eot.
+ point              At point.
+ word-start         At the beginning of a word.
+ word-end           At the end of a word.
+ word-boundary      At the beginning or end of a word.
+ not-word-boundary  Not at the beginning or end of a word.
+ symbol-start       At the beginning of a symbol.
+ symbol-end         At the end of a symbol.
+
+(group RX...)  Match RXs and define a capture group.  Alias: submatch.
+(group-n N RX...) Match RXs and define capture group N.  Alias: submatch-n.
+(backref N)    Match the text that capture group N matched.
+
+(literal EXPR) Match the literal string from evaluating EXPR at run time.
+(regexp EXPR)  Match the string regexp from evaluating EXPR at run time.
+(eval EXPR)    Match the rx sexp from evaluating EXPR at compile time."
+  (let* ((rx--compile-to-lisp t)
+         (re (cond ((null regexps)
+                    (error "No regexp"))
+                   ((cdr regexps)
+                    (rx-to-string `(and ,@regexps) t))
+                   (t
+                    (rx-to-string (car regexps) t)))))
+    (if (stringp re)
+        re
+      `(concat ,@re))))
 
 
 (pcase-defmacro rx (&rest regexps)
@@ -1239,14 +1126,6 @@ string as argument to `match-string'."
                      for var in vars
                      collect `(app (match-string ,i) ,var)))))
 
-;; ;; sregex.el replacement
-
-;; ;;;###autoload (provide 'sregex)
-;; ;;;###autoload (autoload 'sregex "rx")
-;; (defalias 'sregex 'rx-to-string)
-;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro)
-;; (defalias 'sregexq 'rx)
-
 (provide 'rx)
 
 ;;; rx.el ends here