summaryrefslogtreecommitdiff
path: root/lisp/emacs-lisp/regexp-opt.el
diff options
context:
space:
mode:
authorMattias EngdegÄrd <mattiase@acm.org>2019-06-30 12:53:52 +0200
committerMattias EngdegÄrd <mattiase@acm.org>2019-07-04 17:18:15 +0200
commit3fd74915121a3eac265170e20bd19b3cde6a2589 (patch)
tree7abac1b1a1c5241dc9226edf2e99373dd0e966f8 /lisp/emacs-lisp/regexp-opt.el
parent2bc90e0ce0f349b8c80aa8df782f991b64aa7398 (diff)
downloademacs-3fd74915121a3eac265170e20bd19b3cde6a2589.tar.gz
emacs-3fd74915121a3eac265170e20bd19b3cde6a2589.tar.bz2
emacs-3fd74915121a3eac265170e20bd19b3cde6a2589.zip
Optimise more inputs to `regexp-opt' (bug#36444)
Use a more precise test to determine whether the input to `regexp-opt' is safe to optimise when KEEP-ORDER is non-nil, permitting more inputs to be optimised than before. For example, ("good" "goal" "go") is now accepted. * lisp/emacs-lisp/regexp-opt.el (regexp-opt): More precise test for whether the list is safe w.r.t. KEEP-ORDER. (regexp-opt--contains-prefix): Remove. * test/lisp/emacs-lisp/regexp-opt-tests.el: Use lexical-binding. (regexp-opt-test--permutation, regexp-opt-test--factorial) (regexp-opt-test--permutations, regexp-opt-test--match-all) (regexp-opt-test--check-perm, regexp-opt-test--explain-perm) (regexp-opt-keep-order): Test KEEP-ORDER.
Diffstat (limited to 'lisp/emacs-lisp/regexp-opt.el')
-rw-r--r--lisp/emacs-lisp/regexp-opt.el46
1 files changed, 22 insertions, 24 deletions
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el
index b6104f22e7d..ab52003cdf7 100644
--- a/lisp/emacs-lisp/regexp-opt.el
+++ b/lisp/emacs-lisp/regexp-opt.el
@@ -140,21 +140,34 @@ usually more efficient than that of a simplified version:
(completion-ignore-case nil)
(completion-regexp-list nil)
(open (cond ((stringp paren) paren) (paren "\\(")))
- (sorted-strings (delete-dups
- (sort (copy-sequence strings) 'string-lessp)))
(re
(cond
;; No strings: return an unmatchable regexp.
((null strings)
(concat (or open "\\(?:") regexp-unmatchable "\\)"))
- ;; If we cannot reorder, give up all attempts at
- ;; optimisation. There is room for improvement (Bug#34641).
- ((and keep-order (regexp-opt--contains-prefix sorted-strings))
- (concat (or open "\\(?:")
- (mapconcat #'regexp-quote strings "\\|")
- "\\)"))
+
+ ;; The algorithm will generate a pattern that matches
+ ;; longer strings in the list before shorter. If the
+ ;; list order matters, then no string must come after a
+ ;; proper prefix of that string. To check this, verify
+ ;; that a straight or-pattern matches each string
+ ;; entirely.
+ ((and keep-order
+ (let* ((case-fold-search nil)
+ (alts (mapconcat #'regexp-quote strings "\\|")))
+ (and (let ((s strings))
+ (while (and s
+ (string-match alts (car s))
+ (= (match-end 0) (length (car s))))
+ (setq s (cdr s)))
+ ;; If we exited early, we found evidence that
+ ;; regexp-opt-group cannot be used.
+ s)
+ (concat (or open "\\(?:") alts "\\)")))))
(t
- (regexp-opt-group sorted-strings (or open t) (not open))))))
+ (regexp-opt-group
+ (delete-dups (sort (copy-sequence strings) 'string-lessp))
+ (or open t) (not open))))))
(cond ((eq paren 'words)
(concat "\\<" re "\\>"))
((eq paren 'symbols)
@@ -339,21 +352,6 @@ never matches anything."
(concat "[" all "]")))))))
-(defun regexp-opt--contains-prefix (strings)
- "Whether STRINGS contains a proper prefix of one of its other elements.
-STRINGS must be a list of sorted strings without duplicates."
- (let ((s strings))
- ;; In a lexicographically sorted list, a string always immediately
- ;; succeeds one of its prefixes.
- (while (and (cdr s)
- (not (string-equal
- (car s)
- (substring (cadr s) 0 (min (length (car s))
- (length (cadr s)))))))
- (setq s (cdr s)))
- (cdr s)))
-
-
(provide 'regexp-opt)
;;; regexp-opt.el ends here