diff options
author | Mattias EngdegÄrd <mattiase@acm.org> | 2019-06-30 12:53:52 +0200 |
---|---|---|
committer | Mattias EngdegÄrd <mattiase@acm.org> | 2019-07-04 17:18:15 +0200 |
commit | 3fd74915121a3eac265170e20bd19b3cde6a2589 (patch) | |
tree | 7abac1b1a1c5241dc9226edf2e99373dd0e966f8 /lisp/emacs-lisp/regexp-opt.el | |
parent | 2bc90e0ce0f349b8c80aa8df782f991b64aa7398 (diff) | |
download | emacs-3fd74915121a3eac265170e20bd19b3cde6a2589.tar.gz emacs-3fd74915121a3eac265170e20bd19b3cde6a2589.tar.bz2 emacs-3fd74915121a3eac265170e20bd19b3cde6a2589.zip |
Optimise more inputs to `regexp-opt' (bug#36444)
Use a more precise test to determine whether the input to `regexp-opt'
is safe to optimise when KEEP-ORDER is non-nil, permitting more inputs
to be optimised than before. For example, ("good" "goal" "go") is now
accepted.
* lisp/emacs-lisp/regexp-opt.el (regexp-opt):
More precise test for whether the list is safe w.r.t. KEEP-ORDER.
(regexp-opt--contains-prefix): Remove.
* test/lisp/emacs-lisp/regexp-opt-tests.el: Use lexical-binding.
(regexp-opt-test--permutation, regexp-opt-test--factorial)
(regexp-opt-test--permutations, regexp-opt-test--match-all)
(regexp-opt-test--check-perm, regexp-opt-test--explain-perm)
(regexp-opt-keep-order): Test KEEP-ORDER.
Diffstat (limited to 'lisp/emacs-lisp/regexp-opt.el')
-rw-r--r-- | lisp/emacs-lisp/regexp-opt.el | 46 |
1 files changed, 22 insertions, 24 deletions
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index b6104f22e7d..ab52003cdf7 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -140,21 +140,34 @@ usually more efficient than that of a simplified version: (completion-ignore-case nil) (completion-regexp-list nil) (open (cond ((stringp paren) paren) (paren "\\("))) - (sorted-strings (delete-dups - (sort (copy-sequence strings) 'string-lessp))) (re (cond ;; No strings: return an unmatchable regexp. ((null strings) (concat (or open "\\(?:") regexp-unmatchable "\\)")) - ;; If we cannot reorder, give up all attempts at - ;; optimisation. There is room for improvement (Bug#34641). - ((and keep-order (regexp-opt--contains-prefix sorted-strings)) - (concat (or open "\\(?:") - (mapconcat #'regexp-quote strings "\\|") - "\\)")) + + ;; The algorithm will generate a pattern that matches + ;; longer strings in the list before shorter. If the + ;; list order matters, then no string must come after a + ;; proper prefix of that string. To check this, verify + ;; that a straight or-pattern matches each string + ;; entirely. + ((and keep-order + (let* ((case-fold-search nil) + (alts (mapconcat #'regexp-quote strings "\\|"))) + (and (let ((s strings)) + (while (and s + (string-match alts (car s)) + (= (match-end 0) (length (car s)))) + (setq s (cdr s))) + ;; If we exited early, we found evidence that + ;; regexp-opt-group cannot be used. + s) + (concat (or open "\\(?:") alts "\\)"))))) (t - (regexp-opt-group sorted-strings (or open t) (not open)))))) + (regexp-opt-group + (delete-dups (sort (copy-sequence strings) 'string-lessp)) + (or open t) (not open)))))) (cond ((eq paren 'words) (concat "\\<" re "\\>")) ((eq paren 'symbols) @@ -339,21 +352,6 @@ never matches anything." (concat "[" all "]"))))))) -(defun regexp-opt--contains-prefix (strings) - "Whether STRINGS contains a proper prefix of one of its other elements. -STRINGS must be a list of sorted strings without duplicates." - (let ((s strings)) - ;; In a lexicographically sorted list, a string always immediately - ;; succeeds one of its prefixes. - (while (and (cdr s) - (not (string-equal - (car s) - (substring (cadr s) 0 (min (length (car s)) - (length (cadr s))))))) - (setq s (cdr s))) - (cdr s))) - - (provide 'regexp-opt) ;;; regexp-opt.el ends here |