1 files changed, 870 insertions, 0 deletions
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el
new file mode 100644
index 00000000000..ff0d6be3f5d
--- /dev/null
+++ b/test/src/regex-emacs-tests.el
@@ -0,0 +1,870 @@
+;;; regex-emacs-tests.el --- tests for regex-emacs.c -*- lexical-binding: t -*-
+
+;; Copyright (C) 2015-2022 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Code:
+
+(require 'ert)
+
+(defvar regex-tests--resources-dir
+  (concat (concat (file-name-directory (or load-file-name buffer-file-name))
+                  "/regex-resources/"))
+  "Path to regex-resources directory next to the \"regex-emacs-tests.el\" file.")
+
+(ert-deftest regex-word-cc-fallback-test ()
+  "Test that \"[[:cc:]]*x\" matches \"x\" (bug#24020).
+
+Test that a regex of the form \"[[:cc:]]*x\" where CC is
+a character class which matches a multibyte character X, matches
+string \"x\".
+
+For example, \"[[:word:]]*\u2620\" regex (note: \u2620 is a word
+character) must match a string \"\u2420\"."
+  (dolist (class '("[[:word:]]" "\\sw"))
+    (dolist (repeat '("*" "+"))
+      (dolist (suffix '("" "b" "bar" "\u2620"))
+        (dolist (string '("" "foo"))
+          (when (not (and (string-equal repeat "+")
+                          (string-equal string "")))
+            (should (string-match (concat "^" class repeat suffix "$")
+                                  (concat string suffix)))))))))
+
+(defun regex--test-cc (name matching not-matching)
+  (let (case-fold-search)
+    (should (string-match-p (concat "^[[:" name ":]]*$") matching))
+    (should (string-match-p (concat "^[[:" name ":]]*?\u2622$")
+                            (concat matching "\u2622")))
+    (should (string-match-p (concat "^[^[:" name ":]]*$") not-matching))
+    (should (string-match-p (concat "^[^[:" name ":]]*\u2622$")
+                            (concat not-matching "\u2622")))
+    (with-temp-buffer
+      (insert matching)
+      (let ((p (point)))
+        (insert not-matching)
+        (goto-char (point-min))
+        (skip-chars-forward (concat "[:" name ":]"))
+        (should (equal (point) p))
+        (skip-chars-forward (concat "^[:" name ":]"))
+        (should (equal (point) (point-max)))
+        (goto-char (point-min))
+        (skip-chars-forward (concat "[:" name ":]\u2622"))
+        (should (or (equal (point) p) (equal (point) (1+ p))))))))
+
+(dolist (test '(("alnum" "abcABC012łąka" "-, \t\n")
+                ("alpha" "abcABCłąka" "-,012 \t\n")
+                ("digit" "012" "abcABCłąka-, \t\n")
+                ("xdigit" "0123aBc" "łąk-, \t\n")
+                ("upper" "ABCŁĄKA" "abc012-, \t\n")
+                ("lower" "abcłąka" "ABC012-, \t\n")
+
+                ("word" "abcABC012\u2620" "-, \t\n")
+
+                ("punct" ".,-" "abcABC012\u2620 \t\n")
+                ("cntrl" "\1\2\t\n" ".,-abcABC012\u2620 ")
+                ("graph" "abcłąka\u2620-," " \t\n\1")
+                ("print" "abcłąka\u2620-, " "\t\n\1")
+
+                ("space" " \t\n\u2001" "abcABCł0123")
+                ("blank" " \t\u2001" "\n")
+
+                ("ascii" "abcABC012 \t\n\1" "łą\u2620")
+                ("nonascii" "łą\u2622" "abcABC012 \t\n\1")
+                ("unibyte" "abcABC012 \t\n\1" "łą\u2622")
+                ("multibyte" "łą\u2622" "abcABC012 \t\n\1")))
+  (let ((name (intern (concat "regex-tests-" (car test) "-character-class")))
+        (doc (concat "Perform sanity test of regexes using " (car test)
+                     " character class.
+
+Go over all the supported character classes and test whether the
+classes and their inversions match what they are supposed to
+match.  The test is done using `string-match-p' as well as
+`skip-chars-forward'.")))
+    (eval `(ert-deftest ,name () ,doc ,(cons 'regex--test-cc test)) t)))
+
+
+(defmacro regex-tests-generic-line (comment-char test-file whitelist &rest body)
+  "Reads a line of the test file TEST-FILE, skipping
+comments (defined by COMMENT-CHAR), and evaluates the tests in
+this line as defined in the BODY.  Line numbers in the WHITELIST
+are known failures, and are skipped."
+
+  `(with-temp-buffer
+    (modify-syntax-entry ?_ "w;; ") ; tests expect _ to be a word
+    (insert-file-contents (concat regex-tests--resources-dir ,test-file))
+    (let ((case-fold-search nil)
+          (line-number 1)
+          (whitelist-idx 0))
+
+      (goto-char (point-min))
+
+      (while (not (eobp))
+        (let ((start (point)))
+          (end-of-line)
+          (narrow-to-region start (point))
+
+          (goto-char (point-min))
+
+          (when
+              (and
+               ;; ignore comments
+               (save-excursion
+                 (re-search-forward ,(concat "^[^" (string comment-char) "]") nil t))
+
+               ;; skip lines in the whitelist
+               (let ((whitelist-next
+                      (condition-case nil
+                          (aref ,whitelist whitelist-idx) (args-out-of-range nil))))
+                 (cond
+                  ;; whitelist exhausted. do process this line
+                  ((null whitelist-next) t)
+
+                  ;; we're not yet at the next whitelist element. do
+                  ;; process this line
+                  ((< line-number whitelist-next) t)
+
+                  ;; we're past the next whitelist element. This
+                  ;; shouldn't happen
+                  ((> line-number whitelist-next)
+                   (error
+                    (format
+                     "We somehow skipped the next whitelist element: line %d" whitelist-next)))
+
+                  ;; we're at the next whitelist element. Skip this
+                  ;; line, and advance the whitelist index
+                  (t
+                   (setq whitelist-idx (1+ whitelist-idx)) nil))))
+            ,@body)
+
+          (widen)
+          (forward-line)
+          (beginning-of-line)
+          (setq line-number (1+ line-number)))))))
+
+(defun regex-tests-compare (string what-failed bounds-ref &optional substring-ref)
+  "I just ran a search, looking at STRING.  WHAT-FAILED describes
+what failed, if anything; valid values are `search-failed',
+`compilation-failed' and nil.  I compare the beginning/end of each
+group with their expected values.  This is done with either
+BOUNDS-REF or SUBSTRING-REF; one of those should be non-nil.
+BOUNDS-REF is a sequence [start-ref0 end-ref0 start-ref1
+end-ref1 ....] while SUBSTRING-REF is the expected substring
+obtained by indexing the input string by start/end-ref.
+
+If the search was supposed to fail then start-ref0/substring-ref0
+is `search-failed'.  If the search wasn't even supposed to compile
+successfully, then start-ref0/substring-ref0 is
+`compilation-failed'.  If I only care about a match succeeding,
+this can be set to t.
+
+This function returns a string that describes the failure, or nil
+on success"
+
+  (when (or
+         (and bounds-ref substring-ref)
+         (not (or bounds-ref substring-ref)))
+    (error "Exactly one of bounds-ref and bounds-ref should be non-nil"))
+
+  (let ((what-failed-ref (car (or bounds-ref substring-ref))))
+
+    (cond
+     ((eq what-failed 'search-failed)
+      (cond
+       ((eq what-failed-ref 'search-failed)
+        nil)
+       ((eq what-failed-ref 'compilation-failed)
+        "Expected pattern failure; but no match")
+       (t
+        "Expected match; but no match")))
+
+     ((eq what-failed 'compilation-failed)
+      (cond
+       ((eq what-failed-ref 'search-failed)
+        "Expected no match; but pattern failure")
+       ((eq what-failed-ref 'compilation-failed)
+        nil)
+       (t
+        "Expected match; but pattern failure")))
+
+     ;; The regex match succeeded
+     ((eq what-failed-ref 'search-failed)
+      "Expected no match; but match")
+     ((eq what-failed-ref 'compilation-failed)
+      "Expected pattern failure; but match")
+
+     ;; The regex match succeeded, as expected. I now check all the
+     ;; bounds
+     (t
+      (let ((idx 0)
+            msg
+            ref next-ref-function compare-ref-function mismatched-ref-function)
+
+        (if bounds-ref
+            (setq ref bounds-ref
+                  next-ref-function (lambda (x) (cddr x))
+                  compare-ref-function (lambda (ref start-pos end-pos)
+                                         (or (eq (car ref) t)
+                                             (and (eq start-pos (car ref))
+                                                  (eq end-pos   (cadr ref)))))
+                  mismatched-ref-function (lambda (ref start-pos end-pos)
+                                            (format
+                                             "beginning/end positions: %d/%s and %d/%s"
+                                             start-pos (car ref) end-pos (cadr ref))))
+          (setq ref substring-ref
+                next-ref-function (lambda (x) (cdr x))
+                compare-ref-function (lambda (ref start-pos end-pos)
+                                       (or (eq (car ref) t)
+                                           (string= (substring string start-pos end-pos) (car ref))))
+                mismatched-ref-function (lambda (ref start-pos end-pos)
+                                          (format
+                                           "beginning/end positions: %d/%s and %d/%s"
+                                           start-pos (car ref) end-pos (cadr ref)))))
+
+        (while (not (or (null ref) msg))
+
+          (let ((start (match-beginning idx))
+                (end   (match-end       idx)))
+
+            (when (not (funcall compare-ref-function ref start end))
+              (setq msg
+                    (format
+                     "Have expected match, but mismatch in group %d: %s" idx (funcall mismatched-ref-function ref start end))))
+
+            (setq ref (funcall next-ref-function ref)
+                  idx (1+ idx))))
+
+        (or msg
+            nil))))))
+
+
+
+(defun regex-tests-match (pattern string bounds-ref &optional substring-ref)
+  "I match the given STRING against PATTERN.  I compare the
+beginning/end of each group with their expected values.
+BOUNDS-REF is a sequence [start-ref0 end-ref0 start-ref1 end-ref1
+....].
+
+If the search was supposed to fail then start-ref0 is
+`search-failed'.  If the search wasn't even supposed to compile
+successfully, then start-ref0 is `compilation-failed'.
+
+This function returns a string that describes the failure, or nil
+on success"
+
+  (if (string-match "\\[\\([\\.=]\\)..?\\1\\]" pattern)
+      ;; Skipping test: [.x.] and [=x=] forms not supported by emacs
+      nil
+
+    (regex-tests-compare
+     string
+     (condition-case nil
+         (if (string-match pattern string) nil 'search-failed)
+       ('invalid-regexp 'compilation-failed))
+     bounds-ref substring-ref)))
+
+
+(defconst regex-tests-re-even-escapes
+  "\\(?:^\\|[^\\]\\)\\(?:\\\\\\\\\\)*"
+  "Regex that matches an even number of \\ characters.")
+
+(defconst regex-tests-re-odd-escapes
+  (concat regex-tests-re-even-escapes "\\\\")
+  "Regex that matches an odd number of \\ characters.")
+
+
+(defun regex-tests-unextend (pattern)
+  "Basic conversion from extended regexes to emacs ones.  This is
+mostly a hack that adds \\ to () and | and {}, and removes it if
+it already exists.  We also change \\S (and \\s) to \\S- (and
+\\s-) because extended regexes see the former as whitespace, but
+emacs requires an extra symbol character"
+
+  (with-temp-buffer
+    (insert pattern)
+    (goto-char (point-min))
+
+    (while (re-search-forward "[()|{}]" nil t)
+      ;; point is past special character. If it is escaped, unescape
+      ;; it
+
+      (if (save-excursion
+            (re-search-backward (concat regex-tests-re-odd-escapes ".\\=") nil t))
+
+          ;; This special character is preceded by an odd number of \,
+          ;; so I unescape it by removing the last one
+          (progn
+            (forward-char -2)
+            (delete-char 1)
+            (forward-char 1))
+
+        ;; This special character is preceded by an even (possibly 0)
+        ;; number of \. I add an escape
+        (forward-char -1)
+        (insert "\\")
+        (forward-char 1)))
+
+    ;; convert \s to \s-
+    (goto-char (point-min))
+    (while (re-search-forward (concat regex-tests-re-odd-escapes "[Ss]") nil t)
+      (insert "-"))
+
+    (buffer-string)))
+
+(defun regex-tests-BOOST-frob-escapes (s ispattern)
+  "Mangle \\ the way it is done in frob_escapes() in
+regex-tests-BOOST.c in glibc: \\t, \\n, \\r are interpreted;
+\\\\, \\^, \\{, \\|, \\} are unescaped for the string (not
+pattern)"
+
+  ;; this is all similar to (regex-tests-unextend)
+  (with-temp-buffer
+    (insert s)
+
+    (let ((interpret-list (list "t" "n" "r")))
+      (while interpret-list
+        (goto-char (point-min))
+        (while (re-search-forward
+                (concat "\\(" regex-tests-re-even-escapes "\\)"
+                        "\\\\" (car interpret-list))
+                nil t)
+          (replace-match (concat "\\1" (car (read-from-string
+                                             (concat "\"\\" (car interpret-list) "\""))))))
+
+        (setq interpret-list (cdr interpret-list))))
+
+    (when (not ispattern)
+      ;; unescape \\, \^, \{, \|, \}
+      (let ((unescape-list (list "\\\\" "^" "{" "|" "}")))
+        (while unescape-list
+          (goto-char (point-min))
+          (while (re-search-forward
+                  (concat "\\(" regex-tests-re-even-escapes "\\)"
+                          "\\\\" (car unescape-list))
+                  nil t)
+            (replace-match (concat "\\1" (car unescape-list))))
+
+          (setq unescape-list (cdr unescape-list))))
+      )
+    (buffer-string)))
+
+
+
+
+(defconst regex-tests-BOOST-whitelist
+  [
+   ;; emacs is more stringent with regexes involving unbalanced )
+   63 65 69
+
+   ;; in emacs, regex . doesn't match \n
+   91
+
+   ;; emacs is more forgiving with * and ? that don't apply to
+   ;; characters
+   107 108 109 122 123 124 140 141 142
+
+   ;; emacs accepts regexes with {}
+   161
+
+   ;; emacs doesn't fail on bogus ranges such as [3-1] or [1-3-5]
+   222 223
+
+   ;; emacs doesn't match (ab*)[ab]*\1 greedily: only 4 chars of
+   ;; ababaaa match
+   284 294
+
+   ;; ambiguous groupings are ambiguous
+   443 444 445 446 448 449 450
+
+   ;; emacs doesn't know how to handle weird ranges such as [a-Z] and
+   ;; [[:alpha:]-a]
+   539 580 581
+
+   ;; emacs matches non-greedy regex ab.*? non-greedily
+   639 677 712
+   ]
+  "Line numbers in the boost test that should be skipped.
+These are false-positive test failures that represent
+known/benign differences in behavior.")
+
+;; - Format
+;;   - Comments are lines starting with ;
+;;   - Lines starting with - set options passed to regcomp() and regexec():
+;;     - if no "REG_BASIC" is found, with have an extended regex
+;;     - These set a flag:
+;;       - REG_ICASE
+;;       - REG_NEWLINE  (ignored by this function)
+;;       - REG_NOTBOL
+;;       - REG_NOTEOL
+;;
+;;   - Test lines are
+;;     pattern string start0 end0 start1 end1 ...
+;;
+;;   - pattern, string can have escapes
+;;   - string can have whitespace if enclosed in ""
+;;   - if string is "!", then the pattern is supposed to fail compilation
+;;   - start/end are of group0, group1, etc. group 0 is the full match
+;;   - start<0 indicates "no match"
+;;   - start is the 0-based index of the first character
+;;   - end   is the 0-based index of the first character past the group
+(defun regex-tests-BOOST ()
+  (let (failures
+        basic icase notbol noteol)
+    (regex-tests-generic-line
+     ?\; "BOOST.tests" regex-tests-BOOST-whitelist
+     (if (save-excursion (re-search-forward "^-" nil t))
+         (setq basic   (save-excursion (re-search-forward "REG_BASIC" nil t))
+               icase   (save-excursion (re-search-forward "REG_ICASE" nil t))
+               notbol  (save-excursion (re-search-forward "REG_NOTBOL" nil t))
+               noteol  (save-excursion (re-search-forward "REG_NOTEOL" nil t)))
+
+       (save-excursion
+         (or (re-search-forward "\\(\\S-+\\)\\s-+\"\\(.*\\)\"\\s-+?\\(.+\\)" nil t)
+             (re-search-forward "\\(\\S-+\\)\\s-+\\(\\S-+\\)\\s-+?\\(.+\\)"  nil t)
+             (re-search-forward "\\(\\S-+\\)\\s-+\\(!\\)"                    nil t)))
+
+       (let* ((pattern-raw   (match-string 1))
+              (string-raw    (match-string 2))
+              (positions-raw (match-string 3))
+              (pattern (regex-tests-BOOST-frob-escapes pattern-raw t))
+              (string  (regex-tests-BOOST-frob-escapes string-raw  nil))
+              (positions
+               (if (string= string "!")
+                   (list 'compilation-failed 0)
+                 (mapcar
+                  (lambda (x)
+                    (let ((x (string-to-number x)))
+                      (if (< x 0) nil x)))
+                  (split-string positions-raw)))))
+
+         (when (null (car positions))
+           (setcar positions 'search-failed))
+
+         (when (not basic)
+           (setq pattern (regex-tests-unextend pattern)))
+
+         ;; great. I now have all the data parsed. Let's use it to do
+         ;; stuff
+         (let* ((case-fold-search icase)
+                (msg (regex-tests-match pattern string positions)))
+
+           (if (and
+                ;; Skipping test: notbol/noteol not supported
+                (not notbol) (not noteol)
+
+                msg)
+
+               ;; store failure
+               (setq failures
+                     (cons (format "line number %d: Regex '%s': %s"
+                                   line-number pattern msg)
+                           failures)))))))
+
+    failures))
+
+(defconst regex-tests-PCRE-whitelist
+  [
+   ;; ambiguous groupings are ambiguous
+   610 611 1154 1157 1160 1168 1171 1176 1179 1182 1185 1188 1193 1196 1203
+  ]
+  "Line numbers in the PCRE test that should be skipped.
+These are false-positive test failures that represent
+known/benign differences in behavior.")
+
+;; - Format
+;;
+;;  regex
+;;  input_string
+;;  group_num: group_match | "No match"
+;;  input_string
+;;  group_num: group_match | "No match"
+;;  input_string
+;;  group_num: group_match | "No match"
+;;  input_string
+;;  group_num: group_match | "No match"
+;;  ...
+(defun regex-tests-PCRE ()
+  (let (failures
+        pattern icase string what-failed matches-observed)
+    (regex-tests-generic-line
+     ?# "PCRE.tests" regex-tests-PCRE-whitelist
+
+     (cond
+
+      ;; pattern
+      ((save-excursion (re-search-forward "^/\\(.*\\)/\\(.*\\)$" nil t))
+       (setq icase (string= "i" (match-string 2))
+             pattern (regex-tests-unextend (match-string 1))))
+
+      ;; string. read it in, match against pattern, and save all the results
+      ((save-excursion (re-search-forward "^    \\(.*\\)" nil t))
+       (let ((case-fold-search icase))
+         (setq string (match-string 1)
+
+               ;; the regex match under test
+               what-failed
+               (condition-case nil
+                   (if (string-match pattern string) nil 'search-failed)
+                 ('invalid-regexp 'compilation-failed))
+
+               matches-observed
+               (cl-loop for x from 0 to 20
+                        collect (and (not what-failed)
+                                     (or (match-string x string) "<unset>")))))
+       nil)
+
+      ;; verification line: failed match
+      ((save-excursion (re-search-forward "^No match" nil t))
+       (unless what-failed
+         (setq failures
+               (cons (format "line number %d: Regex '%s': Expected no match; but match"
+                             line-number pattern)
+                     failures))))
+
+      ;; verification line: succeeded match
+      ((save-excursion (re-search-forward "^ *\\([0-9]+\\): \\(.*\\)" nil t))
+       (let* ((match-ref (match-string 2))
+              (idx       (string-to-number (match-string 1))))
+
+         (if what-failed
+             "Expected match; but no match"
+           (unless (string= match-ref (elt matches-observed idx))
+             (setq failures
+                   (cons (format "line number %d: Regex '%s': Have expected match, but group %d is wrong: '%s'/'%s'"
+                                 line-number pattern
+                                 idx match-ref (elt matches-observed idx))
+                         failures))))))
+
+      ;; reset
+      (t (setq pattern nil) nil)))
+
+    failures))
+
+(defconst regex-tests-PTESTS-whitelist
+  [
+   ;; emacs doesn't see DEL (0x7f) as a [:cntrl:] character
+   138
+
+   ;; emacs doesn't barf on weird ranges such as [b-a], but simply
+   ;; fails to match
+   168
+  ]
+  "Line numbers in the PTESTS test that should be skipped.
+These are false-positive test failures that represent
+known/benign differences in behavior.")
+
+;; - Format
+;;   - fields separated by ¦ (note: this is not a |)
+;;   - start¦end¦pattern¦string
+;;   - start is the 1-based index of the first character
+;;   - end   is the 1-based index of the last  character
+(defun regex-tests-PTESTS ()
+  (let (failures)
+    (regex-tests-generic-line
+     ?# "PTESTS" regex-tests-PTESTS-whitelist
+     (let* ((fields (split-string (buffer-string) "¦"))
+
+            ;; string has 1-based index of first char in the
+            ;; match. -1 means "no match". -2 means "invalid
+            ;; regex".
+            ;;
+            ;; start-ref is 0-based index of first char in the
+            ;; match
+            ;;
+            ;; string==0 is a special case, and I have to treat
+            ;; it as start-ref = 0
+            (start-ref (let ((raw (string-to-number (elt fields 0))))
+                         (cond
+                          ((= raw -2) 'compilation-failed)
+                          ((= raw -1) 'search-failed)
+                          ((= raw  0) 0)
+                          (t          (1- raw)))))
+
+            ;; string has 1-based index of last char in the
+            ;; match. end-ref is 0-based index of first char past
+            ;; the match
+            (end-ref   (string-to-number (elt fields 1)))
+            (pattern   (elt fields 2))
+            (string    (elt fields 3)))
+
+       (let ((msg (regex-tests-match pattern string (list start-ref end-ref))))
+         (when msg
+           (setq failures
+                 (cons (format "line number %d: Regex '%s': %s"
+                               line-number pattern msg)
+                       failures))))))
+    failures))
+
+(defconst regex-tests-TESTS-whitelist
+  [
+   ;; emacs doesn't barf on weird ranges such as [b-a], but simply
+   ;; fails to match
+   42
+
+   ;; emacs is more forgiving with * and ? that don't apply to
+   ;; characters
+   57 58 59 60
+
+   ;; emacs is more stringent with regexes involving unbalanced )
+   67
+  ]
+  "Line numbers in the TESTS test that should be skipped.
+These are false-positive test failures that represent
+known/benign differences in behavior.")
+
+;; - Format
+;;   - fields separated by :. Watch for [\[:xxx:]]
+;;   - expected:pattern:string
+;;
+;;   expected:
+;;   | 0 | successful match      |
+;;   | 1 | failed match          |
+;;   | 2 | regcomp() should fail |
+(defun regex-tests-TESTS ()
+  (let (failures)
+    (regex-tests-generic-line
+     ?# "TESTS" regex-tests-TESTS-whitelist
+     (if (save-excursion (re-search-forward "^\\([^:]+\\):\\(.*\\):\\([^:]*\\)$" nil t))
+         (let* ((what-failed
+                 (let ((raw (string-to-number (match-string 1))))
+                   (cond
+                    ((= raw 2) 'compilation-failed)
+                    ((= raw 1) 'search-failed)
+                    (t         t))))
+                (string  (match-string 3))
+                (pattern (regex-tests-unextend (match-string 2))))
+
+           (let ((msg (regex-tests-match pattern string nil (list what-failed))))
+             (when msg
+               (setq failures
+                     (cons (format "line number %d: Regex '%s': %s"
+                                   line-number pattern msg)
+                           failures)))))
+
+       (error "Error parsing TESTS file line: '%s'" (buffer-string))))
+    failures))
+
+(ert-deftest regex-tests-BOOST ()
+  "Tests of the regular expression engine.
+This evaluates the BOOST test cases from glibc."
+  (should-not (regex-tests-BOOST)))
+
+(ert-deftest regex-tests-PCRE ()
+  "Tests of the regular expression engine.
+This evaluates the PCRE test cases from glibc."
+  (should-not (regex-tests-PCRE)))
+
+(ert-deftest regex-tests-PTESTS ()
+  "Tests of the regular expression engine.
+This evaluates the PTESTS test cases from glibc."
+  (should-not (regex-tests-PTESTS)))
+
+(ert-deftest regex-tests-TESTS ()
+  "Tests of the regular expression engine.
+This evaluates the TESTS test cases from glibc."
+  (should-not (regex-tests-TESTS)))
+
+(ert-deftest regex-repeat-limit ()
+  "Test the #xFFFF repeat limit."
+  (should (string-match "\\`x\\{65535\\}" (make-string 65535 ?x)))
+  (should-not (string-match "\\`x\\{65535\\}" (make-string 65534 ?x)))
+  (should-error (string-match "\\`x\\{65536\\}" "X") :type 'invalid-regexp))
+
+(ert-deftest regexp-unibyte-unibyte ()
+  "Test matching a unibyte regexp against a unibyte string."
+  ;; Sanity check
+  (should-not (multibyte-string-p "ab"))
+  (should-not (multibyte-string-p "\xff"))
+  ;; ASCII
+  (should (string-match "a[b]" "ab"))
+  ;; Raw
+  (should (string-match "\xf1" "\xf1"))
+  (should-not (string-match "\xf1" "\xc1\xb1"))
+  ;; Raw, char alt
+  (should (string-match "[\xf1]" "\xf1"))
+  (should-not (string-match "[\xf1]" "\xc1\xb1"))
+  ;; Raw range
+  (should (string-match "[\x82-\xd3]" "\xbb"))
+  (should-not (string-match "[\x82-\xd3]" "a"))
+  (should-not (string-match "[\x82-\xd3]" "\x81"))
+  (should-not (string-match "[\x82-\xd3]" "\xd4"))
+  ;; ASCII-raw range
+  (should (string-match "[f-\xd3]" "q"))
+  (should (string-match "[f-\xd3]" "\xbb"))
+  (should-not (string-match "[f-\xd3]" "e"))
+  (should-not (string-match "[f-\xd3]" "\xd4")))
+
+(ert-deftest regexp-multibyte-multibyte ()
+  "Test matching a multibyte regexp against a multibyte string."
+  ;; Sanity check
+  (should (multibyte-string-p "åü"))
+  ;; ASCII
+  (should (string-match (string-to-multibyte "a[b]")
+                        (string-to-multibyte "ab")))
+  ;; Unicode
+  (should (string-match "å[ü]z" "åüz"))
+  (should-not (string-match "ü" (string-to-multibyte "\xc3\xbc")))
+  ;; Raw
+  (should (string-match (string-to-multibyte "\xf1")
+                        (string-to-multibyte "\xf1")))
+  (should-not (string-match (string-to-multibyte "\xf1")
+                            (string-to-multibyte "\xc1\xb1")))
+  (should-not (string-match (string-to-multibyte "\xc1\xb1")
+                            (string-to-multibyte "\xf1")))
+  ;; Raw, char alt
+  (should (string-match (string-to-multibyte "[\xf1]")
+                        (string-to-multibyte "\xf1")))
+  ;; Raw range
+  (should (string-match (string-to-multibyte "[\x82-\xd3]")
+                        (string-to-multibyte "\xbb")))
+  (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "a"))
+  (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "Å"))
+  (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "ü"))
+  (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "\x81"))
+  (should-not (string-match (string-to-multibyte "[\x82-\xd3]") "\xd4"))
+  ;; ASCII-raw range: should exclude U+0100..U+10FFFF
+  (should (string-match (string-to-multibyte "[f-\xd3]")
+                        (string-to-multibyte "q")))
+  (should (string-match (string-to-multibyte "[f-\xd3]")
+                        (string-to-multibyte "\xbb")))
+  (should-not (string-match (string-to-multibyte "[f-\xd3]") "e"))
+  (should-not (string-match (string-to-multibyte "[f-\xd3]") "Å"))
+  (should-not (string-match (string-to-multibyte "[f-\xd3]") "ü"))
+  (should-not (string-match (string-to-multibyte "[f-\xd3]") "\xd4"))
+  ;; Unicode-raw range: should be empty
+  (should-not (string-match "[å-\xd3]" "å"))
+  (should-not (string-match "[å-\xd3]" (string-to-multibyte "\xd3")))
+  (should-not (string-match "[å-\xd3]" (string-to-multibyte "\xbb")))
+  (should-not (string-match "[å-\xd3]" "ü"))
+  ;; No equivalence between raw bytes and latin-1
+  (should-not (string-match "å" (string-to-multibyte "\xe5")))
+  (should-not (string-match "[å]" (string-to-multibyte "\xe5")))
+  (should-not (string-match "\xe5" "å"))
+  (should-not (string-match "[\xe5]" "å")))
+
+(ert-deftest regexp-unibyte-multibyte ()
+  "Test matching a unibyte regexp against a multibyte string."
+  ;; ASCII
+  (should (string-match "a[b]" (string-to-multibyte "ab")))
+  ;; Unicode
+  (should (string-match "a.[^b]c" (string-to-multibyte "aåüc")))
+  ;; Raw
+  (should (string-match "\xf1" (string-to-multibyte "\xf1")))
+  (should-not (string-match "\xc1\xb1" (string-to-multibyte "\xf1")))
+  ;; Raw, char alt
+  (should (string-match "[\xf1]" (string-to-multibyte "\xf1")))
+  (should-not (string-match "[\xc1][\xb1]" (string-to-multibyte "\xf1")))
+  ;; ASCII-raw range: should exclude U+0100..U+10FFFF
+  (should (string-match "[f-\xd3]" (string-to-multibyte "q")))
+  (should (string-match "[f-\xd3]" (string-to-multibyte "\xbb")))
+  (should-not (string-match "[f-\xd3]" "e"))
+  (should-not (string-match "[f-\xd3]" "Å"))
+  (should-not (string-match "[f-\xd3]" "ü"))
+  (should-not (string-match "[f-\xd3]" "\xd4"))
+  ;; No equivalence between raw bytes and latin-1
+  (should-not (string-match "\xe5" "å"))
+  (should-not (string-match "[\xe5]" "å")))
+
+(ert-deftest regexp-multibyte-unibyte ()
+  "Test matching a multibyte regexp against a unibyte string."
+  ;; ASCII
+  (should (string-match (string-to-multibyte "a[b]") "ab"))
+  ;; Unicode
+  (should (string-match "a[^ü]c" "abc"))
+  (should-not (string-match "ü" "\xc3\xbc"))
+  ;; Raw
+  (should (string-match (string-to-multibyte "\xf1") "\xf1"))
+  (should-not (string-match (string-to-multibyte "\xf1") "\xc1\xb1"))
+  ;; Raw, char alt
+  (should (string-match (string-to-multibyte "[\xf1]") "\xf1"))
+  (should-not (string-match (string-to-multibyte "[\xf1]") "\xc1\xb1"))
+  ;; ASCII-raw range: should exclude U+0100..U+10FFFF
+  (should (string-match (string-to-multibyte "[f-\xd3]") "q"))
+  (should (string-match (string-to-multibyte "[f-\xd3]") "\xbb"))
+  (should-not (string-match (string-to-multibyte "[f-\xd3]") "e"))
+  (should-not (string-match (string-to-multibyte "[f-\xd3]") "\xd4"))
+  ;; Unicode-raw range: should be empty
+  (should-not (string-match "[å-\xd3]" "\xd3"))
+  (should-not (string-match "[å-\xd3]" "\xbb"))
+  ;; No equivalence between raw bytes and latin-1
+  (should-not (string-match "å" "\xe5"))
+  (should-not (string-match "[å]" "\xe5")))
+
+(ert-deftest regexp-case-fold ()
+  "Test case-sensitive and case-insensitive matching."
+  (let ((case-fold-search nil))
+    (should (equal (string-match "aB" "ABaB") 2))
+    (should (equal (string-match "åÄ" "ÅäåäÅÄåÄ") 6))
+    (should (equal (string-match "λΛ" "lΛλλΛ") 3))
+    (should (equal (string-match "шШ" "zШшшШ") 3))
+    (should (equal (string-match "[[:alpha:]]+" ".3aBåÄßλΛшШ中﷽") 2))
+    (should (equal (match-end 0) 12))
+    (should (equal (string-match "[[:alnum:]]+" ".3aBåÄßλΛшШ中﷽") 1))
+    (should (equal (match-end 0) 12))
+    (should (equal (string-match "[[:upper:]]+" ".3aåλшBÄΛШ中﷽") 6))
+    (should (equal (match-end 0) 10))
+    (should (equal (string-match "[[:lower:]]+" ".3BÄΛШaåλш中﷽") 6))
+    (should (equal (match-end 0) 10)))
+  (let ((case-fold-search t))
+    (should (equal (string-match "aB" "ABaB") 0))
+    (should (equal (string-match "åÄ" "ÅäåäÅÄåÄ") 0))
+    (should (equal (string-match "λΛ" "lΛλλΛ") 1))
+    (should (equal (string-match "шШ" "zШшшШ") 1))
+    (should (equal (string-match "[[:alpha:]]+" ".3aBåÄßλΛшШ中﷽") 2))
+    (should (equal (match-end 0) 12))
+    (should (equal (string-match "[[:alnum:]]+" ".3aBåÄßλΛшШ中﷽") 1))
+    (should (equal (match-end 0) 12))
+    (should (equal (string-match "[[:upper:]]+" ".3aåλшBÄΛШ中﷽") 2))
+    (should (equal (match-end 0) 10))
+    (should (equal (string-match "[[:lower:]]+" ".3BÄΛШaåλш中﷽") 2))
+    (should (equal (match-end 0) 10))))
+
+(ert-deftest regexp-eszett ()
+  "Test matching of ß and ẞ."
+  ;; Sanity checks.
+  (should (equal (upcase "ß") "SS"))
+  (should (equal (downcase "ß") "ß"))
+  (should (equal (capitalize "ß") "Ss"))  ; undeutsch...
+  (should (equal (upcase "ẞ") "ẞ"))
+  (should (equal (downcase "ẞ") "ß"))
+  (should (equal (capitalize "ẞ") "ẞ"))
+  ;; ß is a lower-case letter (Ll); ẞ is an upper-case letter (Lu).
+  (let ((case-fold-search nil))
+    (should (equal (string-match "ß" "ß") 0))
+    (should (equal (string-match "ß" "ẞ") nil))
+    (should (equal (string-match "ẞ" "ß") nil))
+    (should (equal (string-match "ẞ" "ẞ") 0))
+    (should (equal (string-match "[[:alpha:]]" "ß") 0))
+    ;; bug#11309
+    (should (equal (string-match "[[:lower:]]" "ß") 0))
+    (should (equal (string-match "[[:upper:]]" "ß") nil))
+    (should (equal (string-match "[[:alpha:]]" "ẞ") 0))
+    (should (equal (string-match "[[:lower:]]" "ẞ") nil))
+    (should (equal (string-match "[[:upper:]]" "ẞ") 0)))
+  (let ((case-fold-search t))
+    (should (equal (string-match "ß" "ß") 0))
+    (should (equal (string-match "ß" "ẞ") 0))
+    (should (equal (string-match "ẞ" "ß") 0))
+    (should (equal (string-match "ẞ" "ẞ") 0))
+    (should (equal (string-match "[[:alpha:]]" "ß") 0))
+    ;; bug#11309
+    (should (equal (string-match "[[:lower:]]" "ß") 0))
+    (should (equal (string-match "[[:upper:]]" "ß") 0))
+    (should (equal (string-match "[[:alpha:]]" "ẞ") 0))
+    (should (equal (string-match "[[:lower:]]" "ẞ") 0))
+    (should (equal (string-match "[[:upper:]]" "ẞ") 0))))
+
+;;; regex-emacs-tests.el ends here