diff options
Diffstat (limited to 'lisp/emacs-lisp/regexp-opt.el')
-rw-r--r-- | lisp/emacs-lisp/regexp-opt.el | 49 |
1 files changed, 25 insertions, 24 deletions
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index 20d56a15724..de9966c0af0 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -1,7 +1,6 @@ ;;; regexp-opt.el --- generate efficient regexps to match strings -;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -;; 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. +;; Copyright (C) 1994-2013 Free Software Foundation, Inc. ;; Author: Simon Marshall <simon@gnu.org> ;; Maintainer: FSF @@ -96,19 +95,24 @@ The returned regexp is typically more efficient than the equivalent regexp: (concat open (mapconcat 'regexp-quote STRINGS \"\\\\|\") close)) If PAREN is `words', then the resulting regexp is additionally surrounded -by \\=\\< and \\>." +by \\=\\< and \\>. +If PAREN is `symbols', then the resulting regexp is additionally surrounded +by \\=\\_< and \\_>." (save-match-data ;; Recurse on the sorted list. (let* ((max-lisp-eval-depth 10000) (max-specpdl-size 10000) (completion-ignore-case nil) (completion-regexp-list nil) - (words (eq paren 'words)) (open (cond ((stringp paren) paren) (paren "\\("))) (sorted-strings (delete-dups (sort (copy-sequence strings) 'string-lessp))) (re (regexp-opt-group sorted-strings (or open t) (not open)))) - (if words (concat "\\<" re "\\>") re)))) + (cond ((eq paren 'words) + (concat "\\<" re "\\>")) + ((eq paren 'symbols) + (concat "\\_<" re "\\_>")) + (t re))))) ;;;###autoload (defun regexp-opt-depth (regexp) @@ -120,7 +124,7 @@ This means the number of non-shy regexp grouping constructs (string-match regexp "") ;; Count the number of open parentheses in REGEXP. (let ((count 0) start last) - (while (string-match "\\\\(\\(\\?:\\)?" regexp start) + (while (string-match "\\\\(\\(\\?[0-9]*:\\)?" regexp start) (setq start (match-end 0)) ; Start of next search. (when (and (not (match-beginning 1)) (subregexp-context-p regexp (match-beginning 0) last)) @@ -132,14 +136,11 @@ This means the number of non-shy regexp grouping constructs ;;; Workhorse functions. -(eval-when-compile - (require 'cl)) - (defun regexp-opt-group (strings &optional paren lax) "Return a regexp to match a string in the sorted list STRINGS. If PAREN non-nil, output regexp parentheses around returned regexp. If LAX non-nil, don't output parentheses if it doesn't require them. -Merges keywords to avoid backtracking in Emacs' regexp matcher." +Merges keywords to avoid backtracking in Emacs's regexp matcher." ;; The basic idea is to find the shortest common prefix or suffix, remove it ;; and recurse. If there is no prefix, we divide the list into two so that ;; \(at least) one half will have at least a one-character common prefix. @@ -233,7 +234,8 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." (defun regexp-opt-charset (chars) - "Return a regexp to match a character in CHARS." + "Return a regexp to match a character in CHARS. +CHARS should be a list of characters." ;; The basic idea is to find character ranges. Also we take care in the ;; position of character set meta characters in the character set regexp. ;; @@ -244,15 +246,15 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." ;; ;; Make a character map but extract character set meta characters. (dolist (char chars) - (case char - (?\] - (setq bracket "]")) - (?^ - (setq caret "^")) - (?- - (setq dash "-")) - (otherwise - (aset charmap char t)))) + (cond + ((eq char ?\]) + (setq bracket "]")) + ((eq char ?^) + (setq caret "^")) + ((eq char ?-) + (setq dash "-")) + (t + (aset charmap char t)))) ;; ;; Make a character set from the map using ranges where applicable. (map-char-table @@ -264,14 +266,14 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." (setq charset (format "%s%c-%c" charset start end)) (while (>= end start) (setq charset (format "%s%c" charset start)) - (incf start))) + (setq start (1+ start)))) (setq start (car c) end (cdr c))) (if (= (1- c) end) (setq end c) (if (> end (+ start 2)) (setq charset (format "%s%c-%c" charset start end)) (while (>= end start) (setq charset (format "%s%c" charset start)) - (incf start))) + (setq start (1+ start)))) (setq start c end c))))) charmap) (when (>= end start) @@ -279,7 +281,7 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." (setq charset (format "%s%c-%c" charset start end)) (while (>= end start) (setq charset (format "%s%c" charset start)) - (incf start)))) + (setq start (1+ start))))) ;; ;; Make sure a caret is not first and a dash is first or last. (if (and (string-equal charset "") (string-equal bracket "")) @@ -288,5 +290,4 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." (provide 'regexp-opt) -;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370 ;;; regexp-opt.el ends here |