diff options
Diffstat (limited to 'src/search.c')
-rw-r--r-- | src/search.c | 1431 |
1 files changed, 723 insertions, 708 deletions
diff --git a/src/search.c b/src/search.c index 0cb1ec41f59..b5d6a442c0f 100644 --- a/src/search.c +++ b/src/search.c @@ -1,6 +1,6 @@ /* String search routines for GNU Emacs. -Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2017 Free Software +Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2022 Free Software Foundation, Inc. This file is part of GNU Emacs. @@ -29,8 +29,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ #include "region-cache.h" #include "blockinput.h" #include "intervals.h" +#include "pdumper.h" +#include "composite.h" -#include "regex.h" +#include "regex-emacs.h" #define REGEXP_CACHE_SIZE 20 @@ -48,6 +50,8 @@ struct regexp_cache char fastmap[0400]; /* True means regexp was compiled to do full POSIX backtracking. */ bool posix; + /* True means we're inside a buffer match. */ + bool busy; }; /* The instances of that struct. */ @@ -56,31 +60,6 @@ static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE]; /* The head of the linked list; points to the most recently used buffer. */ static struct regexp_cache *searchbuf_head; - -/* Every call to re_match, etc., must pass &search_regs as the regs - argument unless you can show it is unnecessary (i.e., if re_match - is certainly going to be called again before region-around-match - can be called). - - Since the registers are now dynamically allocated, we need to make - sure not to refer to the Nth register before checking that it has - been allocated by checking search_regs.num_regs. - - The regex code keeps track of whether it has allocated the search - buffer using bits in the re_pattern_buffer. This means that whenever - you compile a new pattern, it completely forgets whether it has - allocated any registers, and will allocate new registers the next - time you call a searching or matching function. Therefore, we need - to call re_set_registers after compiling a new pattern or after - setting the match registers, so that the regex functions will be - able to free or re-allocate it properly. */ -/* static struct re_registers search_regs; */ - -/* The buffer in which the last search was performed, or - Qt if the last search was done in a string; - Qnil if no searching has been done yet. */ -/* static Lisp_Object last_thing_searched; */ - static void set_search_regs (ptrdiff_t, ptrdiff_t); static void save_search_regs (void); static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t, @@ -93,7 +72,9 @@ static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT, int, Lisp_Object, Lisp_Object, bool); -static _Noreturn void +Lisp_Object re_match_object; + +static AVOID matcher_overflow (void) { error ("Stack overflow in regexp matcher"); @@ -110,14 +91,6 @@ freeze_buffer_relocation (void) #endif } -static void -thaw_buffer_relocation (void) -{ -#ifdef REL_ALLOC - unbind_to (SPECPDL_INDEX () - 1, Qnil); -#endif -} - /* Compile a regexp and signal a Lisp error if anything goes wrong. PATTERN is the pattern to compile. CP is the place to put the result. @@ -134,8 +107,9 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, const char *whitespace_regexp; char *val; + eassert (!cp->busy); cp->regexp = Qnil; - cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); + cp->buf.translate = translate; cp->posix = posix; cp->buf.multibyte = STRING_MULTIBYTE (pattern); cp->buf.charset_unibyte = charset_unibyte; @@ -144,12 +118,6 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, else cp->f_whitespace_regexp = Qnil; - /* rms: I think BLOCK_INPUT is not needed here any more, - because regex.c defines malloc to call xmalloc. - Using BLOCK_INPUT here means the debugger won't run if an error occurs. - So let's turn it off. */ - /* BLOCK_INPUT; */ - whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ? SSDATA (Vsearch_spaces_regexp) : NULL; @@ -160,7 +128,6 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, syntax-table, it can only be reused with *this* syntax table. */ cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt; - /* unblock_input (); */ if (val) xsignal1 (Qinvalid_regexp, build_string (val)); @@ -177,10 +144,11 @@ shrink_regexp_cache (void) struct regexp_cache *cp; for (cp = searchbuf_head; cp != 0; cp = cp->next) - { - cp->buf.allocated = cp->buf.used; - cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used); - } + if (!cp->busy) + { + cp->buf.allocated = cp->buf.used; + cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used); + } } /* Clear the regexp cache w.r.t. a particular syntax table, @@ -197,10 +165,25 @@ clear_regexp_cache (void) /* It's tempting to compare with the syntax-table we've actually changed, but it's not sufficient because char-table inheritance means that modifying one syntax-table can change others at the same time. */ - if (!EQ (searchbufs[i].syntax_table, Qt)) + if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt)) searchbufs[i].regexp = Qnil; } +static void +unfreeze_pattern (void *arg) +{ + struct regexp_cache *searchbuf = arg; + searchbuf->busy = false; +} + +static void +freeze_pattern (struct regexp_cache *searchbuf) +{ + eassert (!searchbuf->busy); + record_unwind_protect_ptr (unfreeze_pattern, searchbuf); + searchbuf->busy = true; +} + /* Compile a regexp if necessary, but first check to see if there's one in the cache. PATTERN is the pattern to compile. @@ -212,15 +195,17 @@ clear_regexp_cache (void) POSIX is true if we want full backtracking (POSIX style) for this pattern. False means backtrack only enough to get a valid match. */ -struct re_pattern_buffer * +static struct regexp_cache * compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, bool posix, bool multibyte) { - struct regexp_cache *cp, **cpp; + struct regexp_cache *cp, **cpp, **lru_nonbusy; - for (cpp = &searchbuf_head; ; cpp = &cp->next) + for (cpp = &searchbuf_head, lru_nonbusy = NULL; ; cpp = &cp->next) { cp = *cpp; + if (!cp->busy) + lru_nonbusy = cpp; /* Entries are initialized to nil, and may be set to nil by compile_pattern_1 if the pattern isn't valid. Don't apply string accessors in those cases. However, compile_pattern_1 @@ -229,9 +214,10 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp, if (NILP (cp->regexp)) goto compile_it; if (SCHARS (cp->regexp) == SCHARS (pattern) + && !cp->busy && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) && !NILP (Fstring_equal (cp->regexp, pattern)) - && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0))) + && EQ (cp->buf.translate, translate) && cp->posix == posix && (EQ (cp->syntax_table, Qt) || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table))) @@ -239,12 +225,16 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp, && cp->buf.charset_unibyte == charset_unibyte) break; - /* If we're at the end of the cache, compile into the nil cell - we found, or the last (least recently used) cell with a - string value. */ + /* If we're at the end of the cache, compile into the last + (least recently used) non-busy cell in the cache. */ if (cp->next == 0) { + if (!lru_nonbusy) + error ("Too much matching reentrancy"); + cpp = lru_nonbusy; + cp = *cpp; compile_it: + eassert (!cp->busy); compile_pattern_1 (cp, pattern, translate, posix); break; } @@ -265,35 +255,38 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp, /* The compiled pattern can be used both for multibyte and unibyte target. But, we have to tell which the pattern is used for. */ cp->buf.target_multibyte = multibyte; - - return &cp->buf; + return cp; } static Lisp_Object -looking_at_1 (Lisp_Object string, bool posix) +looking_at_1 (Lisp_Object string, bool posix, bool modify_data) { Lisp_Object val; unsigned char *p1, *p2; ptrdiff_t s1, s2; register ptrdiff_t i; - struct re_pattern_buffer *bufp; if (running_asynch_code) save_search_regs (); - /* This is so set_image_of_range_1 in regex.c can find the EQV table. */ + /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV + table. */ set_char_table_extras (BVAR (current_buffer, case_canon_table), 2, BVAR (current_buffer, case_eqv_table)); CHECK_STRING (string); - bufp = compile_pattern (string, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : NULL), - (!NILP (BVAR (current_buffer, case_fold_search)) - ? BVAR (current_buffer, case_canon_table) : Qnil), - posix, - !NILP (BVAR (current_buffer, enable_multibyte_characters))); + + /* Snapshot in case Lisp changes the value. */ + bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data; + + struct regexp_cache *cache_entry = compile_pattern ( + string, + modify_match_data ? &search_regs : NULL, + (!NILP (BVAR (current_buffer, case_fold_search)) + ? BVAR (current_buffer, case_canon_table) : Qnil), + posix, + !NILP (BVAR (current_buffer, enable_multibyte_characters))); /* Do a pending quit right away, to avoid paradoxical behavior */ maybe_quit (); @@ -317,21 +310,23 @@ looking_at_1 (Lisp_Object string, bool posix) s2 = 0; } - re_match_object = Qnil; - + specpdl_ref count = SPECPDL_INDEX (); freeze_buffer_relocation (); - i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2, + freeze_pattern (cache_entry); + re_match_object = Qnil; + i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2, PT_BYTE - BEGV_BYTE, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : NULL), + modify_match_data ? &search_regs : NULL, ZV_BYTE - BEGV_BYTE); - thaw_buffer_relocation (); if (i == -2) - matcher_overflow (); + { + unbind_to (count, Qnil); + matcher_overflow (); + } val = (i >= 0 ? Qt : Qnil); - if (NILP (Vinhibit_changing_match_data) && i >= 0) + if (modify_match_data && i >= 0) { for (i = 0; i < search_regs.num_regs; i++) if (search_regs.start[i] >= 0) @@ -345,38 +340,39 @@ looking_at_1 (Lisp_Object string, bool posix) XSETBUFFER (last_thing_searched, current_buffer); } - return val; + return unbind_to (count, val); } -DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0, +DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 2, 0, doc: /* Return t if text after point matches regular expression REGEXP. -This function modifies the match data that `match-beginning', -`match-end' and `match-data' access; save and restore the match -data if you want to preserve them. */) - (Lisp_Object regexp) +By default, this function modifies the match data that +`match-beginning', `match-end' and `match-data' access. If +INHIBIT-MODIFY is non-nil, don't modify the match data. */) + (Lisp_Object regexp, Lisp_Object inhibit_modify) { - return looking_at_1 (regexp, 0); + return looking_at_1 (regexp, 0, NILP (inhibit_modify)); } -DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0, - doc: /* Return t if text after point matches regular expression REGEXP. -Find the longest match, in accord with Posix regular expression rules. -This function modifies the match data that `match-beginning', -`match-end' and `match-data' access; save and restore the match -data if you want to preserve them. */) - (Lisp_Object regexp) +DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 2, 0, + doc: /* Return t if text after point matches REGEXP according to Posix rules. +Find the longest match, in accordance with Posix regular expression rules. + +By default, this function modifies the match data that +`match-beginning', `match-end' and `match-data' access. If +INHIBIT-MODIFY is non-nil, don't modify the match data. */) + (Lisp_Object regexp, Lisp_Object inhibit_modify) { - return looking_at_1 (regexp, 1); + return looking_at_1 (regexp, 1, NILP (inhibit_modify)); } static Lisp_Object string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, - bool posix) + bool posix, bool modify_data) { ptrdiff_t val; - struct re_pattern_buffer *bufp; EMACS_INT pos; ptrdiff_t pos_byte, i; + bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data; if (running_asynch_code) save_search_regs (); @@ -390,8 +386,8 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, { ptrdiff_t len = SCHARS (string); - CHECK_NUMBER (start); - pos = XINT (start); + CHECK_FIXNUM (start); + pos = XFIXNUM (start); if (pos < 0 && -pos <= len) pos = len + pos; else if (0 > pos || pos > len) @@ -399,34 +395,37 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, pos_byte = string_char_to_byte (string, pos); } - /* This is so set_image_of_range_1 in regex.c can find the EQV table. */ + /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV + table. */ set_char_table_extras (BVAR (current_buffer, case_canon_table), 2, BVAR (current_buffer, case_eqv_table)); - bufp = compile_pattern (regexp, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : NULL), - (!NILP (BVAR (current_buffer, case_fold_search)) - ? BVAR (current_buffer, case_canon_table) : Qnil), - posix, - STRING_MULTIBYTE (string)); + specpdl_ref count = SPECPDL_INDEX (); + struct regexp_cache *cache_entry + = compile_pattern (regexp, + modify_match_data ? &search_regs : NULL, + (!NILP (BVAR (current_buffer, case_fold_search)) + ? BVAR (current_buffer, case_canon_table) + : Qnil), + posix, + STRING_MULTIBYTE (string)); + freeze_pattern (cache_entry); re_match_object = string; - - val = re_search (bufp, SSDATA (string), + val = re_search (&cache_entry->buf, SSDATA (string), SBYTES (string), pos_byte, SBYTES (string) - pos_byte, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : NULL)); + (modify_match_data ? &search_regs : NULL)); + unbind_to (count, Qnil); /* Set last_thing_searched only when match data is changed. */ - if (NILP (Vinhibit_changing_match_data)) + if (modify_match_data) last_thing_searched = Qt; if (val == -2) matcher_overflow (); if (val < 0) return Qnil; - if (NILP (Vinhibit_changing_match_data)) + if (modify_match_data) for (i = 0; i < search_regs.num_regs; i++) if (search_regs.start[i] >= 0) { @@ -436,35 +435,45 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, = string_byte_to_char (string, search_regs.end[i]); } - return make_number (string_byte_to_char (string, val)); + return make_fixnum (string_byte_to_char (string, val)); } -DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0, +DEFUN ("string-match", Fstring_match, Sstring_match, 2, 4, 0, doc: /* Return index of start of first match for REGEXP in STRING, or nil. Matching ignores case if `case-fold-search' is non-nil. If third arg START is non-nil, start search at that index in STRING. -For index of first char beyond the match, do (match-end 0). -`match-end' and `match-beginning' also give indices of substrings -matched by parenthesis constructs in the pattern. -You can use the function `match-string' to extract the substrings -matched by the parenthesis constructions in REGEXP. */) - (Lisp_Object regexp, Lisp_Object string, Lisp_Object start) +If INHIBIT-MODIFY is non-nil, match data is not changed. + +If INHIBIT-MODIFY is nil or missing, match data is changed, and +`match-end' and `match-beginning' give indices of substrings matched +by parenthesis constructs in the pattern. You can use the function +`match-string' to extract the substrings matched by the parenthesis +constructions in REGEXP. For index of first char beyond the match, do +(match-end 0). */) + (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, + Lisp_Object inhibit_modify) { - return string_match_1 (regexp, string, start, 0); + return string_match_1 (regexp, string, start, 0, NILP (inhibit_modify)); } -DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0, - doc: /* Return index of start of first match for REGEXP in STRING, or nil. +DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 4, 0, + doc: /* Return index of start of first match for Posix REGEXP in STRING, or nil. Find the longest match, in accord with Posix regular expression rules. Case is ignored if `case-fold-search' is non-nil in the current buffer. -If third arg START is non-nil, start search at that index in STRING. -For index of first char beyond the match, do (match-end 0). -`match-end' and `match-beginning' also give indices of substrings -matched by parenthesis constructs in the pattern. */) - (Lisp_Object regexp, Lisp_Object string, Lisp_Object start) + +If INHIBIT-MODIFY is non-nil, match data is not changed. + +If INHIBIT-MODIFY is nil or missing, match data is changed, and +`match-end' and `match-beginning' give indices of substrings matched +by parenthesis constructs in the pattern. You can use the function +`match-string' to extract the substrings matched by the parenthesis +constructions in REGEXP. For index of first char beyond the match, do +(match-end 0). */) + (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, + Lisp_Object inhibit_modify) { - return string_match_1 (regexp, string, start, 1); + return string_match_1 (regexp, string, start, 1, NILP (inhibit_modify)); } /* Match REGEXP against STRING using translation table TABLE, @@ -475,16 +484,15 @@ ptrdiff_t fast_string_match_internal (Lisp_Object regexp, Lisp_Object string, Lisp_Object table) { - ptrdiff_t val; - struct re_pattern_buffer *bufp; - - bufp = compile_pattern (regexp, 0, table, - 0, STRING_MULTIBYTE (string)); re_match_object = string; - - val = re_search (bufp, SSDATA (string), - SBYTES (string), 0, - SBYTES (string), 0); + specpdl_ref count = SPECPDL_INDEX (); + struct regexp_cache *cache_entry + = compile_pattern (regexp, 0, table, 0, STRING_MULTIBYTE (string)); + freeze_pattern (cache_entry); + ptrdiff_t val = re_search (&cache_entry->buf, SSDATA (string), + SBYTES (string), 0, + SBYTES (string), 0); + unbind_to (count, Qnil); return val; } @@ -497,15 +505,14 @@ ptrdiff_t fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string, ptrdiff_t len) { - ptrdiff_t val; - struct re_pattern_buffer *bufp; - regexp = string_make_unibyte (regexp); + specpdl_ref count = SPECPDL_INDEX (); + struct regexp_cache *cache_entry + = compile_pattern (regexp, 0, Vascii_canon_table, 0, 0); + freeze_pattern (cache_entry); re_match_object = Qt; - bufp = compile_pattern (regexp, 0, - Vascii_canon_table, 0, - 0); - val = re_search (bufp, string, len, 0, len, 0); + ptrdiff_t val = re_search (&cache_entry->buf, string, len, 0, len, 0); + unbind_to (count, Qnil); return val; } @@ -520,7 +527,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string) { bool multibyte; - struct re_pattern_buffer *buf; unsigned char *p1, *p2; ptrdiff_t s1, s2; ptrdiff_t len; @@ -535,7 +541,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, s1 = 0; p2 = SDATA (string); s2 = SBYTES (string); - re_match_object = string; multibyte = STRING_MULTIBYTE (string); } else @@ -561,16 +566,19 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, s1 = ZV_BYTE - BEGV_BYTE; s2 = 0; } - re_match_object = Qnil; multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); } - buf = compile_pattern (regexp, 0, Qnil, 0, multibyte); + struct regexp_cache *cache_entry = + compile_pattern (regexp, 0, Qnil, 0, multibyte); + specpdl_ref count = SPECPDL_INDEX (); freeze_buffer_relocation (); - len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2, + freeze_pattern (cache_entry); + re_match_object = STRINGP (string) ? string : Qnil; + len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2, pos_byte, NULL, limit_byte); - thaw_buffer_relocation (); + unbind_to (count, Qnil); return len; } @@ -619,7 +627,10 @@ newline_cache_on_off (struct buffer *buf) { /* It should be on. */ if (base_buf->newline_cache == 0) - base_buf->newline_cache = new_region_cache (); + { + base_buf->newline_cache = new_region_cache (); + __lsan_ignore_object (base_buf->newline_cache); + } } return base_buf->newline_cache; } @@ -634,14 +645,16 @@ newline_cache_on_off (struct buffer *buf) If COUNT is zero, do anything you please; run rogue, for all I care. If END is zero, use BEGV or ZV instead, as appropriate for the - direction indicated by COUNT. + direction indicated by COUNT. If START_BYTE is -1 it is unknown, + and similarly for END_BYTE. - If we find COUNT instances, set *SHORTAGE to zero, and return the + If we find COUNT instances, set *COUNTED to COUNT, and return the position past the COUNTth match. Note that for reverse motion this is not the same as the usual convention for Emacs motion commands. - If we don't find COUNT instances before reaching END, set *SHORTAGE - to the number of newlines left unfound, and return END. + If we don't find COUNT instances before reaching END, set *COUNTED + to the number of newlines left found (negated if COUNT is negative), + and return END. If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding to the returned character position. @@ -651,23 +664,17 @@ newline_cache_on_off (struct buffer *buf) ptrdiff_t find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, - ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage, + ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted, ptrdiff_t *bytepos, bool allow_quit) { struct region_cache *newline_cache; - int direction; struct buffer *cache_buffer; - if (count > 0) + if (!end) { - direction = 1; - if (!end) + if (count > 0) end = ZV, end_byte = ZV_BYTE; - } - else - { - direction = -1; - if (!end) + else end = BEGV, end_byte = BEGV_BYTE; } if (end_byte == -1) @@ -679,8 +686,8 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, else cache_buffer = current_buffer; - if (shortage != 0) - *shortage = 0; + if (counted) + *counted = count; if (count > 0) while (start != end) @@ -923,8 +930,8 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, } } - if (shortage) - *shortage = count * direction; + if (counted) + *counted -= count; if (bytepos) { *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte; @@ -939,30 +946,28 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, We report the resulting position by calling TEMP_SET_PT_BOTH. If we find COUNT instances. we position after (always after, - even if scanning backwards) the COUNTth match, and return 0. + even if scanning backwards) the COUNTth match. If we don't find COUNT instances before reaching the end of the - buffer (or the beginning, if scanning backwards), we return - the number of line boundaries left unfound, and position at + buffer (or the beginning, if scanning backwards), we position at the limit we bumped up against. If ALLOW_QUIT, check for quitting. That's good to do except in special cases. */ -ptrdiff_t +void scan_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t limit, ptrdiff_t limit_byte, ptrdiff_t count, bool allow_quit) { - ptrdiff_t charpos, bytepos, shortage; + ptrdiff_t charpos, bytepos, counted; charpos = find_newline (start, start_byte, limit, limit_byte, - count, &shortage, &bytepos, allow_quit); - if (shortage) + count, &counted, &bytepos, allow_quit); + if (counted != count) TEMP_SET_PT_BOTH (limit, limit_byte); else TEMP_SET_PT_BOTH (charpos, bytepos); - return shortage; } /* Like above, but always scan from point and report the @@ -972,19 +977,19 @@ ptrdiff_t scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos, ptrdiff_t *bytepos) { - ptrdiff_t shortage; + ptrdiff_t counted; if (count <= 0) *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1, - &shortage, bytepos, 1); + &counted, bytepos, 1); else *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count, - &shortage, bytepos, 1); - return shortage; + &counted, bytepos, 1); + return counted; } /* Like find_newline, but doesn't allow QUITting and doesn't return - SHORTAGE. */ + COUNTED. */ ptrdiff_t find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte, ptrdiff_t cnt, ptrdiff_t *bytepos) @@ -1000,13 +1005,13 @@ ptrdiff_t find_before_next_newline (ptrdiff_t from, ptrdiff_t to, ptrdiff_t cnt, ptrdiff_t *bytepos) { - ptrdiff_t shortage; - ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1); + ptrdiff_t counted; + ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &counted, bytepos, 1); - if (shortage == 0) + if (counted == cnt) { if (bytepos) - DEC_BOTH (pos, *bytepos); + dec_both (&pos, &*bytepos); else pos--; } @@ -1026,8 +1031,8 @@ search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, if (!NILP (count)) { - CHECK_NUMBER (count); - n *= XINT (count); + CHECK_FIXNUM (count); + n *= XFIXNUM (count); } CHECK_STRING (string); @@ -1040,8 +1045,7 @@ search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, } else { - CHECK_NUMBER_COERCE_MARKER (bound); - lim = XINT (bound); + lim = fix_position (bound); if (n > 0 ? lim < PT : lim > PT) error ("Invalid search bound (wrong side of point)"); if (lim > ZV) @@ -1052,7 +1056,8 @@ search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, lim_byte = CHAR_TO_BYTE (lim); } - /* This is so set_image_of_range_1 in regex.c can find the EQV table. */ + /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV + table. */ set_char_table_extras (BVAR (current_buffer, case_canon_table), 2, BVAR (current_buffer, case_eqv_table)); @@ -1086,7 +1091,7 @@ search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, eassert (BEGV <= np && np <= ZV); SET_PT (np); - return make_number (np); + return make_fixnum (np); } /* Return true if REGEXP it matches just one constant string. */ @@ -1141,9 +1146,9 @@ do \ if (! NILP (trt)) \ { \ Lisp_Object temp; \ - temp = Faref (trt, make_number (d)); \ - if (INTEGERP (temp)) \ - out = XINT (temp); \ + temp = Faref (trt, make_fixnum (d)); \ + if (FIXNUMP (temp)) \ + out = XFIXNUM (temp); \ else \ out = d; \ } \ @@ -1158,355 +1163,369 @@ while (0) static struct re_registers search_regs_1; static EMACS_INT -search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, - ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, - int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix) +search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, + ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, + Lisp_Object trt, Lisp_Object inverse_trt, bool posix) { - ptrdiff_t len = SCHARS (string); - ptrdiff_t len_byte = SBYTES (string); - register ptrdiff_t i; + unsigned char *p1, *p2; + ptrdiff_t s1, s2; - if (running_asynch_code) - save_search_regs (); + /* Snapshot in case Lisp changes the value. */ + bool preserve_match_data = NILP (Vinhibit_changing_match_data); - /* Searching 0 times means don't move. */ - /* Null string is found at starting position. */ - if (len == 0 || n == 0) + struct regexp_cache *cache_entry = + compile_pattern (string, + preserve_match_data ? &search_regs : &search_regs_1, + trt, posix, + !NILP (BVAR (current_buffer, enable_multibyte_characters))); + struct re_pattern_buffer *bufp = &cache_entry->buf; + + maybe_quit (); /* Do a pending quit right away, + to avoid paradoxical behavior */ + /* Get pointers and sizes of the two strings + that make up the visible portion of the buffer. */ + + p1 = BEGV_ADDR; + s1 = GPT_BYTE - BEGV_BYTE; + p2 = GAP_END_ADDR; + s2 = ZV_BYTE - GPT_BYTE; + if (s1 < 0) { - set_search_regs (pos_byte, 0); - return pos; + p2 = p1; + s2 = ZV_BYTE - BEGV_BYTE; + s1 = 0; } - - if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) + if (s2 < 0) { - unsigned char *p1, *p2; - ptrdiff_t s1, s2; - struct re_pattern_buffer *bufp; + s1 = ZV_BYTE - BEGV_BYTE; + s2 = 0; + } - bufp = compile_pattern (string, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : &search_regs_1), - trt, posix, - !NILP (BVAR (current_buffer, enable_multibyte_characters))); + specpdl_ref count = SPECPDL_INDEX (); + freeze_buffer_relocation (); + freeze_pattern (cache_entry); - maybe_quit (); /* Do a pending quit right away, - to avoid paradoxical behavior */ - /* Get pointers and sizes of the two strings - that make up the visible portion of the buffer. */ + while (n < 0) + { + ptrdiff_t val; - p1 = BEGV_ADDR; - s1 = GPT_BYTE - BEGV_BYTE; - p2 = GAP_END_ADDR; - s2 = ZV_BYTE - GPT_BYTE; - if (s1 < 0) - { - p2 = p1; - s2 = ZV_BYTE - BEGV_BYTE; - s1 = 0; - } - if (s2 < 0) - { - s1 = ZV_BYTE - BEGV_BYTE; - s2 = 0; - } re_match_object = Qnil; + val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, + pos_byte - BEGV_BYTE, lim_byte - pos_byte, + preserve_match_data ? &search_regs : &search_regs_1, + /* Don't allow match past current point */ + pos_byte - BEGV_BYTE); + if (val == -2) + { + unbind_to (count, Qnil); + matcher_overflow (); + } + if (val >= 0) + { + if (preserve_match_data) + { + pos_byte = search_regs.start[0] + BEGV_BYTE; + for (ptrdiff_t i = 0; i < search_regs.num_regs; i++) + if (search_regs.start[i] >= 0) + { + search_regs.start[i] + = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); + search_regs.end[i] + = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); + } + XSETBUFFER (last_thing_searched, current_buffer); + /* Set pos to the new position. */ + pos = search_regs.start[0]; + } + else + { + pos_byte = search_regs_1.start[0] + BEGV_BYTE; + /* Set pos to the new position. */ + pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE); + } + } + else + { + unbind_to (count, Qnil); + return (n); + } + n++; + maybe_quit (); + } + while (n > 0) + { + ptrdiff_t val; - freeze_buffer_relocation (); + re_match_object = Qnil; + val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, + pos_byte - BEGV_BYTE, lim_byte - pos_byte, + preserve_match_data ? &search_regs : &search_regs_1, + lim_byte - BEGV_BYTE); + if (val == -2) + { + unbind_to (count, Qnil); + matcher_overflow (); + } + if (val >= 0) + { + if (preserve_match_data) + { + pos_byte = search_regs.end[0] + BEGV_BYTE; + for (ptrdiff_t i = 0; i < search_regs.num_regs; i++) + if (search_regs.start[i] >= 0) + { + search_regs.start[i] + = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); + search_regs.end[i] + = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); + } + XSETBUFFER (last_thing_searched, current_buffer); + pos = search_regs.end[0]; + } + else + { + pos_byte = search_regs_1.end[0] + BEGV_BYTE; + pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE); + } + } + else + { + unbind_to (count, Qnil); + return (0 - n); + } + n--; + maybe_quit (); + } + unbind_to (count, Qnil); + return (pos); +} - while (n < 0) - { - ptrdiff_t val; - - val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, - pos_byte - BEGV_BYTE, lim_byte - pos_byte, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : &search_regs_1), - /* Don't allow match past current point */ - pos_byte - BEGV_BYTE); - if (val == -2) - { - matcher_overflow (); - } - if (val >= 0) - { - if (NILP (Vinhibit_changing_match_data)) - { - pos_byte = search_regs.start[0] + BEGV_BYTE; - for (i = 0; i < search_regs.num_regs; i++) - if (search_regs.start[i] >= 0) - { - search_regs.start[i] - = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); - search_regs.end[i] - = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); - } - XSETBUFFER (last_thing_searched, current_buffer); - /* Set pos to the new position. */ - pos = search_regs.start[0]; - } - else - { - pos_byte = search_regs_1.start[0] + BEGV_BYTE; - /* Set pos to the new position. */ - pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE); - } - } - else - { - thaw_buffer_relocation (); - return (n); - } - n++; - maybe_quit (); - } - while (n > 0) - { - ptrdiff_t val; - - val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, - pos_byte - BEGV_BYTE, lim_byte - pos_byte, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : &search_regs_1), - lim_byte - BEGV_BYTE); - if (val == -2) - { - matcher_overflow (); - } - if (val >= 0) - { - if (NILP (Vinhibit_changing_match_data)) - { - pos_byte = search_regs.end[0] + BEGV_BYTE; - for (i = 0; i < search_regs.num_regs; i++) - if (search_regs.start[i] >= 0) - { - search_regs.start[i] - = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); - search_regs.end[i] - = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); - } - XSETBUFFER (last_thing_searched, current_buffer); - pos = search_regs.end[0]; - } - else - { - pos_byte = search_regs_1.end[0] + BEGV_BYTE; - pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE); - } - } - else - { - thaw_buffer_relocation (); - return (0 - n); - } - n--; - maybe_quit (); - } - thaw_buffer_relocation (); - return (pos); +static EMACS_INT +search_buffer_non_re (Lisp_Object string, ptrdiff_t pos, + ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte, + EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt, + bool posix) +{ + unsigned char *raw_pattern, *pat; + ptrdiff_t raw_pattern_size; + ptrdiff_t raw_pattern_size_byte; + unsigned char *patbuf; + bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); + unsigned char *base_pat; + /* Set to positive if we find a non-ASCII char that need + translation. Otherwise set to zero later. */ + int char_base = -1; + bool boyer_moore_ok = 1; + USE_SAFE_ALLOCA; + + /* MULTIBYTE says whether the text to be searched is multibyte. + We must convert PATTERN to match that, or we will not really + find things right. */ + + if (multibyte == STRING_MULTIBYTE (string)) + { + raw_pattern = SDATA (string); + raw_pattern_size = SCHARS (string); + raw_pattern_size_byte = SBYTES (string); } - else /* non-RE case */ + else if (multibyte) { - unsigned char *raw_pattern, *pat; - ptrdiff_t raw_pattern_size; - ptrdiff_t raw_pattern_size_byte; - unsigned char *patbuf; - bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); - unsigned char *base_pat; - /* Set to positive if we find a non-ASCII char that need - translation. Otherwise set to zero later. */ - int char_base = -1; - bool boyer_moore_ok = 1; - USE_SAFE_ALLOCA; - - /* MULTIBYTE says whether the text to be searched is multibyte. - We must convert PATTERN to match that, or we will not really - find things right. */ - - if (multibyte == STRING_MULTIBYTE (string)) - { - raw_pattern = SDATA (string); - raw_pattern_size = SCHARS (string); - raw_pattern_size_byte = SBYTES (string); - } - else if (multibyte) - { - raw_pattern_size = SCHARS (string); - raw_pattern_size_byte - = count_size_as_multibyte (SDATA (string), - raw_pattern_size); - raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1); - copy_text (SDATA (string), raw_pattern, - SCHARS (string), 0, 1); - } - else - { - /* Converting multibyte to single-byte. - - ??? Perhaps this conversion should be done in a special way - by subtracting nonascii-insert-offset from each non-ASCII char, - so that only the multibyte chars which really correspond to - the chosen single-byte character set can possibly match. */ - raw_pattern_size = SCHARS (string); - raw_pattern_size_byte = SCHARS (string); - raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1); - copy_text (SDATA (string), raw_pattern, - SBYTES (string), 1, 0); - } + raw_pattern_size = SCHARS (string); + raw_pattern_size_byte + = count_size_as_multibyte (SDATA (string), + raw_pattern_size); + raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1); + copy_text (SDATA (string), raw_pattern, + SCHARS (string), 0, 1); + } + else + { + /* Converting multibyte to single-byte. */ + raw_pattern_size = SCHARS (string); + raw_pattern_size_byte = SCHARS (string); + raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1); + copy_text (SDATA (string), raw_pattern, + SBYTES (string), 1, 0); + } - /* Copy and optionally translate the pattern. */ - len = raw_pattern_size; - len_byte = raw_pattern_size_byte; - SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len); - pat = patbuf; - base_pat = raw_pattern; - if (multibyte) - { - /* Fill patbuf by translated characters in STRING while - checking if we can use boyer-moore search. If TRT is - non-nil, we can use boyer-moore search only if TRT can be - represented by the byte array of 256 elements. For that, - all non-ASCII case-equivalents of all case-sensitive - characters in STRING must belong to the same character - group (two characters belong to the same group iff their - multibyte forms are the same except for the last byte; - i.e. every 64 characters form a group; U+0000..U+003F, - U+0040..U+007F, U+0080..U+00BF, ...). */ - - while (--len >= 0) - { - unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str; - int c, translated, inverse; - int in_charlen, charlen; - - /* If we got here and the RE flag is set, it's because we're - dealing with a regexp known to be trivial, so the backslash - just quotes the next character. */ - if (RE && *base_pat == '\\') - { - len--; - raw_pattern_size--; - len_byte--; - base_pat++; - } + /* Copy and optionally translate the pattern. */ + ptrdiff_t len = raw_pattern_size; + ptrdiff_t len_byte = raw_pattern_size_byte; + SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len); + pat = patbuf; + base_pat = raw_pattern; + if (multibyte) + { + /* Fill patbuf by translated characters in STRING while + checking if we can use boyer-moore search. If TRT is + non-nil, we can use boyer-moore search only if TRT can be + represented by the byte array of 256 elements. For that, + all non-ASCII case-equivalents of all case-sensitive + characters in STRING must belong to the same character + group (two characters belong to the same group iff their + multibyte forms are the same except for the last byte; + i.e. every 64 characters form a group; U+0000..U+003F, + U+0040..U+007F, U+0080..U+00BF, ...). */ + + while (--len >= 0) + { + unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str; + int translated, inverse; + int charlen; + + /* If we got here and the RE flag is set, it's because we're + dealing with a regexp known to be trivial, so the backslash + just quotes the next character. */ + if (RE && *base_pat == '\\') + { + len--; + raw_pattern_size--; + len_byte--; + base_pat++; + } - c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen); + int in_charlen, c = string_char_and_length (base_pat, &in_charlen); - if (NILP (trt)) - { - str = base_pat; - charlen = in_charlen; - } - else - { - /* Translate the character. */ - TRANSLATE (translated, trt, c); - charlen = CHAR_STRING (translated, str_base); - str = str_base; - - /* Check if C has any other case-equivalents. */ - TRANSLATE (inverse, inverse_trt, c); - /* If so, check if we can use boyer-moore. */ - if (c != inverse && boyer_moore_ok) - { - /* Check if all equivalents belong to the same - group of characters. Note that the check of C - itself is done by the last iteration. */ - int this_char_base = -1; + if (NILP (trt)) + { + str = base_pat; + charlen = in_charlen; + } + else + { + /* Translate the character. */ + TRANSLATE (translated, trt, c); + charlen = CHAR_STRING (translated, str_base); + str = str_base; + + /* Check if C has any other case-equivalents. */ + TRANSLATE (inverse, inverse_trt, c); + /* If so, check if we can use boyer-moore. */ + if (c != inverse && boyer_moore_ok) + { + /* Check if all equivalents belong to the same + group of characters. Note that the check of C + itself is done by the last iteration. */ + int this_char_base = -1; + + while (boyer_moore_ok) + { + if (ASCII_CHAR_P (inverse)) + { + if (this_char_base > 0) + boyer_moore_ok = 0; + else + this_char_base = 0; + } + else if (CHAR_BYTE8_P (inverse)) + /* Boyer-moore search can't handle a + translation of an eight-bit + character. */ + boyer_moore_ok = 0; + else if (this_char_base < 0) + { + this_char_base = inverse & ~0x3F; + if (char_base < 0) + char_base = this_char_base; + else if (this_char_base != char_base) + boyer_moore_ok = 0; + } + else if ((inverse & ~0x3F) != this_char_base) + boyer_moore_ok = 0; + if (c == inverse) + break; + TRANSLATE (inverse, inverse_trt, inverse); + } + } + } - while (boyer_moore_ok) - { - if (ASCII_CHAR_P (inverse)) - { - if (this_char_base > 0) - boyer_moore_ok = 0; - else - this_char_base = 0; - } - else if (CHAR_BYTE8_P (inverse)) - /* Boyer-moore search can't handle a - translation of an eight-bit - character. */ - boyer_moore_ok = 0; - else if (this_char_base < 0) - { - this_char_base = inverse & ~0x3F; - if (char_base < 0) - char_base = this_char_base; - else if (this_char_base != char_base) - boyer_moore_ok = 0; - } - else if ((inverse & ~0x3F) != this_char_base) - boyer_moore_ok = 0; - if (c == inverse) - break; - TRANSLATE (inverse, inverse_trt, inverse); - } - } - } + /* Store this character into the translated pattern. */ + memcpy (pat, str, charlen); + pat += charlen; + base_pat += in_charlen; + len_byte -= in_charlen; + } - /* Store this character into the translated pattern. */ - memcpy (pat, str, charlen); - pat += charlen; - base_pat += in_charlen; - len_byte -= in_charlen; - } + /* If char_base is still negative we didn't find any translated + non-ASCII characters. */ + if (char_base < 0) + char_base = 0; + } + else + { + /* Unibyte buffer. */ + char_base = 0; + while (--len >= 0) + { + int c, translated, inverse; - /* If char_base is still negative we didn't find any translated - non-ASCII characters. */ - if (char_base < 0) - char_base = 0; - } - else - { - /* Unibyte buffer. */ - char_base = 0; - while (--len >= 0) - { - int c, translated, inverse; + /* If we got here and the RE flag is set, it's because we're + dealing with a regexp known to be trivial, so the backslash + just quotes the next character. */ + if (RE && *base_pat == '\\') + { + len--; + raw_pattern_size--; + base_pat++; + } + c = *base_pat++; + TRANSLATE (translated, trt, c); + *pat++ = translated; + /* Check that none of C's equivalents violates the + assumptions of boyer_moore. */ + TRANSLATE (inverse, inverse_trt, c); + while (1) + { + if (inverse >= 0200) + { + boyer_moore_ok = 0; + break; + } + if (c == inverse) + break; + TRANSLATE (inverse, inverse_trt, inverse); + } + } + } - /* If we got here and the RE flag is set, it's because we're - dealing with a regexp known to be trivial, so the backslash - just quotes the next character. */ - if (RE && *base_pat == '\\') - { - len--; - raw_pattern_size--; - base_pat++; - } - c = *base_pat++; - TRANSLATE (translated, trt, c); - *pat++ = translated; - /* Check that none of C's equivalents violates the - assumptions of boyer_moore. */ - TRANSLATE (inverse, inverse_trt, c); - while (1) - { - if (inverse >= 0200) - { - boyer_moore_ok = 0; - break; - } - if (c == inverse) - break; - TRANSLATE (inverse, inverse_trt, inverse); - } - } - } + len_byte = pat - patbuf; + pat = base_pat = patbuf; + + EMACS_INT result + = (boyer_moore_ok + ? boyer_moore (n, pat, len_byte, trt, inverse_trt, + pos_byte, lim_byte, + char_base) + : simple_search (n, pat, raw_pattern_size, len_byte, trt, + pos, pos_byte, lim, lim_byte)); + SAFE_FREE (); + return result; +} - len_byte = pat - patbuf; - pat = base_pat = patbuf; - - EMACS_INT result - = (boyer_moore_ok - ? boyer_moore (n, pat, len_byte, trt, inverse_trt, - pos_byte, lim_byte, - char_base) - : simple_search (n, pat, raw_pattern_size, len_byte, trt, - pos, pos_byte, lim, lim_byte)); - SAFE_FREE (); - return result; +static EMACS_INT +search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, + ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, + int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix) +{ + if (running_asynch_code) + save_search_regs (); + + /* Searching 0 times means don't move. */ + /* Null string is found at starting position. */ + if (n == 0 || SCHARS (string) == 0) + { + set_search_regs (pos_byte, 0); + return pos; } + + if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) + pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte, + n, trt, inverse_trt, posix); + else + pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte, + n, RE, trt, inverse_trt, posix); + + return pos; } /* Do a simple string search N times for the string PAT, @@ -1548,12 +1567,10 @@ simple_search (EMACS_INT n, unsigned char *pat, while (this_len > 0) { - int charlen, buf_charlen; - int pat_ch, buf_ch; - - pat_ch = STRING_CHAR_AND_LENGTH (p, charlen); - buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), - buf_charlen); + int charlen, pat_ch = string_char_and_length (p, &charlen); + int buf_charlen, buf_ch + = string_char_and_length (BYTE_POS_ADDR (this_pos_byte), + &buf_charlen); TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) @@ -1574,7 +1591,7 @@ simple_search (EMACS_INT n, unsigned char *pat, break; } - INC_BOTH (pos, pos_byte); + inc_both (&pos, &pos_byte); } n--; @@ -1636,8 +1653,8 @@ simple_search (EMACS_INT n, unsigned char *pat, { int pat_ch, buf_ch; - DEC_BOTH (this_pos, this_pos_byte); - PREV_CHAR_BOUNDARY (p, pat); + dec_both (&this_pos, &this_pos_byte); + p -= raw_prev_char_len (p); pat_ch = STRING_CHAR (p); buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte)); TRANSLATE (buf_ch, trt, buf_ch); @@ -1656,7 +1673,7 @@ simple_search (EMACS_INT n, unsigned char *pat, break; } - DEC_BOTH (pos, pos_byte); + dec_both (&pos, &pos_byte); } n++; @@ -2159,8 +2176,8 @@ set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes) the match position. */ if (search_regs.num_regs == 0) { - search_regs.start = xmalloc (2 * sizeof (regoff_t)); - search_regs.end = xmalloc (2 * sizeof (regoff_t)); + search_regs.start = xmalloc (2 * sizeof *search_regs.start); + search_regs.end = xmalloc (2 * sizeof *search_regs.end); search_regs.num_regs = 2; } @@ -2233,8 +2250,11 @@ DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4, This function is almost identical to `re-search-forward', except that by default it searches backward instead of forward, and the sign of COUNT also indicates exactly the opposite searching direction. +See `re-search-forward' for details. -See `re-search-forward' for details. */) +Note that searching backwards may give a shorter match than expected, +because REGEXP is still matched in the forward direction. See Info +anchor `(elisp) re-search-backward' for details. */) (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count) { return search_command (regexp, bound, noerror, count, -1, 1, 0); @@ -2273,7 +2293,7 @@ and `replace-match'. */) DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4, "sPosix search backward: ", - doc: /* Search backward from point for match for regular expression REGEXP. + doc: /* Search backward from point for match for REGEXP according to Posix rules. Find the longest match in accord with Posix regular expression rules. Set point to the beginning of the occurrence found, and return point. An optional second argument bounds the search; it is a buffer position. @@ -2301,7 +2321,7 @@ and `replace-match'. */) DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4, "sPosix search: ", - doc: /* Search forward from point for regular expression REGEXP. + doc: /* Search forward from point for REGEXP according to Posix rules. Find the longest match in accord with Posix regular expression rules. Set point to the end of the occurrence found, and return point. An optional second argument bounds the search; it is a buffer position. @@ -2380,37 +2400,35 @@ since only regular expressions have distinguished subexpressions. */) if (! NILP (string)) CHECK_STRING (string); + /* Most replacement texts don't contain any backslash directives in + the replacements. Check whether that's the case, which will + enable us to take the fast path later. */ + if (NILP (literal) + && !memchr (SSDATA (newtext), '\\', SBYTES (newtext))) + literal = Qt; + case_action = nochange; /* We tried an initialization */ /* but some C compilers blew it */ - if (search_regs.num_regs <= 0) + ptrdiff_t num_regs = search_regs.num_regs; + if (num_regs <= 0) error ("`replace-match' called before any match found"); - if (NILP (subexp)) - sub = 0; - else - { - CHECK_NUMBER (subexp); - if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs)) - args_out_of_range (subexp, make_number (search_regs.num_regs)); - sub = XINT (subexp); - } + sub = !NILP (subexp) ? check_integer_range (subexp, 0, num_regs - 1) : 0; + ptrdiff_t sub_start = search_regs.start[sub]; + ptrdiff_t sub_end = search_regs.end[sub]; + eassert (sub_start <= sub_end); - if (NILP (string)) - { - if (search_regs.start[sub] < BEGV - || search_regs.start[sub] > search_regs.end[sub] - || search_regs.end[sub] > ZV) - args_out_of_range (make_number (search_regs.start[sub]), - make_number (search_regs.end[sub])); - } - else + /* Check whether the text to replace is present in the buffer/string. */ + if (! (NILP (string) + ? BEGV <= sub_start && sub_end <= ZV + : 0 <= sub_start && sub_end <= SCHARS (string))) { - if (search_regs.start[sub] < 0 - || search_regs.start[sub] > search_regs.end[sub] - || search_regs.end[sub] > SCHARS (string)) - args_out_of_range (make_number (search_regs.start[sub]), - make_number (search_regs.end[sub])); + if (sub_start < 0) + xsignal2 (Qerror, + build_string ("replace-match subexpression does not exist"), + subexp); + args_out_of_range (make_fixnum (sub_start), make_fixnum (sub_end)); } if (NILP (fixedcase)) @@ -2418,8 +2436,8 @@ since only regular expressions have distinguished subexpressions. */) /* Decide how to casify by examining the matched text. */ ptrdiff_t last; - pos = search_regs.start[sub]; - last = search_regs.end[sub]; + pos = sub_start; + last = sub_end; if (NILP (string)) pos_byte = CHAR_TO_BYTE (pos); @@ -2441,10 +2459,11 @@ since only regular expressions have distinguished subexpressions. */) if (NILP (string)) { c = FETCH_CHAR_AS_MULTIBYTE (pos_byte); - INC_BOTH (pos, pos_byte); + inc_both (&pos, &pos_byte); } else - FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte); + c = fetch_string_char_as_multibyte_advance (string, + &pos, &pos_byte); if (lowercasep (c)) { @@ -2495,9 +2514,8 @@ since only regular expressions have distinguished subexpressions. */) { Lisp_Object before, after; - before = Fsubstring (string, make_number (0), - make_number (search_regs.start[sub])); - after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil); + before = Fsubstring (string, make_fixnum (0), make_fixnum (sub_start)); + after = Fsubstring (string, make_fixnum (sub_end), Qnil); /* Substitute parts of the match into NEWTEXT if desired. */ @@ -2518,20 +2536,20 @@ since only regular expressions have distinguished subexpressions. */) ptrdiff_t subend = 0; bool delbackslash = 0; - FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); + c = fetch_string_char_advance (newtext, &pos, &pos_byte); if (c == '\\') { - FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); + c = fetch_string_char_advance (newtext, &pos, &pos_byte); if (c == '&') { - substart = search_regs.start[sub]; - subend = search_regs.end[sub]; + substart = sub_start; + subend = sub_end; } else if (c >= '1' && c <= '9') { - if (c - '0' < search_regs.num_regs + if (c - '0' < num_regs && search_regs.start[c - '0'] >= 0) { substart = search_regs.start[c - '0']; @@ -2560,8 +2578,8 @@ since only regular expressions have distinguished subexpressions. */) middle = Qnil; accum = concat3 (accum, middle, Fsubstring (string, - make_number (substart), - make_number (subend))); + make_fixnum (substart), + make_fixnum (subend))); lastpos = pos; lastpos_byte = pos_byte; } @@ -2596,13 +2614,8 @@ since only regular expressions have distinguished subexpressions. */) return concat3 (before, newtext, after); } - /* Record point, then move (quietly) to the start of the match. */ - if (PT >= search_regs.end[sub]) - opoint = PT - ZV; - else if (PT > search_regs.start[sub]) - opoint = search_regs.end[sub] - ZV; - else - opoint = PT; + /* Record point. A nonpositive OPOINT is actually an offset from ZV. */ + opoint = PT <= sub_start ? PT : max (PT, sub_end) - ZV; /* If we want non-literal replacement, perform substitution on the replacement string. */ @@ -2635,7 +2648,8 @@ since only regular expressions have distinguished subexpressions. */) if (str_multibyte) { - FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte); + c = fetch_string_char_advance_no_check (newtext, + &pos, &pos_byte); if (!buf_multibyte) c = CHAR_TO_BYTE8 (c); } @@ -2644,7 +2658,7 @@ since only regular expressions have distinguished subexpressions. */) /* Note that we don't have to increment POS. */ c = SREF (newtext, pos_byte++); if (buf_multibyte) - MAKE_CHAR_MULTIBYTE (c); + c = make_char_multibyte (c); } /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED, @@ -2657,8 +2671,8 @@ since only regular expressions have distinguished subexpressions. */) if (str_multibyte) { - FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, - pos, pos_byte); + c = fetch_string_char_advance_no_check (newtext, + &pos, &pos_byte); if (!buf_multibyte && !ASCII_CHAR_P (c)) c = CHAR_TO_BYTE8 (c); } @@ -2666,12 +2680,12 @@ since only regular expressions have distinguished subexpressions. */) { c = SREF (newtext, pos_byte++); if (buf_multibyte) - MAKE_CHAR_MULTIBYTE (c); + c = make_char_multibyte (c); } if (c == '&') idx = sub; - else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs) + else if ('1' <= c && c <= '9' && c - '0' < num_regs) { if (search_regs.start[c - '0'] >= 1) idx = c - '0'; @@ -2729,48 +2743,37 @@ since only regular expressions have distinguished subexpressions. */) xfree (substed); } - /* The functions below modify the buffer, so they could trigger - various modification hooks (see signal_before_change and - signal_after_change). If these hooks clobber the match data we - error out since otherwise this will result in confusing bugs. */ - ptrdiff_t sub_start = search_regs.start[sub]; - ptrdiff_t sub_end = search_regs.end[sub]; - unsigned num_regs = search_regs.num_regs; - newpoint = search_regs.start[sub] + SCHARS (newtext); + newpoint = sub_start + SCHARS (newtext); /* Replace the old text with the new in the cleanest possible way. */ - replace_range (search_regs.start[sub], search_regs.end[sub], - newtext, 1, 0, 1, 1); - /* Update saved data to match adjustment made by replace_range. */ - { - ptrdiff_t change = newpoint - sub_end; - if (sub_start >= sub_end) - sub_start += change; - sub_end += change; - } + replace_range (sub_start, sub_end, newtext, 1, 0, 1, true, true); if (case_action == all_caps) - Fupcase_region (make_number (search_regs.start[sub]), - make_number (newpoint), + Fupcase_region (make_fixnum (search_regs.start[sub]), + make_fixnum (newpoint), Qnil); else if (case_action == cap_initial) - Fupcase_initials_region (make_number (search_regs.start[sub]), - make_number (newpoint)); - - if (search_regs.start[sub] != sub_start - || search_regs.end[sub] != sub_end - || search_regs.num_regs != num_regs) + Fupcase_initials_region (make_fixnum (search_regs.start[sub]), + make_fixnum (newpoint), Qnil); + + /* The replace_range etc. functions can trigger modification hooks + (see signal_before_change and signal_after_change). Try to error + out if these hooks clobber the match data since clobbering can + result in confusing bugs. We used to check for changes in + search_regs start and end, but that fails if modification hooks + remove or add text earlier in the buffer, so just check num_regs + now. */ + if (search_regs.num_regs != num_regs) error ("Match data clobbered by buffer modification hooks"); - /* Put point back where it was in the text. */ - if (opoint <= 0) - TEMP_SET_PT (opoint + ZV); - else - TEMP_SET_PT (opoint); - - /* Now move point "officially" to the start of the inserted replacement. */ + /* Put point back where it was in the text, if possible. */ + TEMP_SET_PT (clip_to_bounds (BEGV, opoint + (opoint <= 0 ? ZV : 0), ZV)); + /* Now move point "officially" to the end of the inserted replacement. */ move_if_not_intangible (newpoint); + signal_after_change (sub_start, sub_end - sub_start, SCHARS (newtext)); + update_compositions (sub_start, newpoint, CHECK_BORDER); + return Qnil; } @@ -2779,16 +2782,16 @@ match_limit (Lisp_Object num, bool beginningp) { EMACS_INT n; - CHECK_NUMBER (num); - n = XINT (num); + CHECK_FIXNUM (num); + n = XFIXNUM (num); if (n < 0) - args_out_of_range (num, make_number (0)); + args_out_of_range (num, make_fixnum (0)); if (search_regs.num_regs <= 0) error ("No match data, because no search succeeded"); if (n >= search_regs.num_regs || search_regs.start[n] < 0) return Qnil; - return (make_number ((beginningp) ? search_regs.start[n] + return (make_fixnum ((beginningp) ? search_regs.start[n] : search_regs.end[n])); } @@ -2827,6 +2830,14 @@ All the elements are markers or nil (nil if the Nth pair didn't match) if the last match was on a buffer; integers or nil if a string was matched. Use `set-match-data' to reinstate the data in this list. +Note that non-matching optional groups at the end of the regexp are +elided instead of being represented with two `nil's each. For instance: + + (progn + (string-match "^\\(a\\)?\\(b\\)\\(c\\)?$" "b") + (match-data)) + => (0 1 nil nil 0 1) + If INTEGERS (the optional first argument) is non-nil, always use integers (rather than markers) to represent buffer positions. In this case, and if the last match was in a buffer, the buffer will get @@ -2878,11 +2889,11 @@ Return value is undefined if the last search failed. */) { data[2 * i] = Fmake_marker (); Fset_marker (data[2 * i], - make_number (start), + make_fixnum (start), last_thing_searched); data[2 * i + 1] = Fmake_marker (); Fset_marker (data[2 * i + 1], - make_number (search_regs.end[i]), + make_fixnum (search_regs.end[i]), last_thing_searched); } else @@ -2959,18 +2970,16 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) /* Allocate registers if they don't already exist. */ { - EMACS_INT length = XFASTINT (Flength (list)) / 2; + ptrdiff_t length = list_length (list) / 2; if (length > search_regs.num_regs) { ptrdiff_t num_regs = search_regs.num_regs; - if (PTRDIFF_MAX < length) - memory_full (SIZE_MAX); search_regs.start = xpalloc (search_regs.start, &num_regs, length - num_regs, - min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t)); + min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start); search_regs.end = - xrealloc (search_regs.end, num_regs * sizeof (regoff_t)); + xrealloc (search_regs.end, num_regs * sizeof *search_regs.end); for (i = search_regs.num_regs; i < num_regs; i++) search_regs.start[i] = -1; @@ -3007,7 +3016,7 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer); } - CHECK_NUMBER_COERCE_MARKER (marker); + CHECK_FIXNUM_COERCE_MARKER (marker); from = marker; if (!NILP (reseat) && MARKERP (m)) @@ -3024,16 +3033,13 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) if (MARKERP (marker) && XMARKER (marker)->buffer == 0) XSETFASTINT (marker, 0); - CHECK_NUMBER_COERCE_MARKER (marker); - if ((XINT (from) < 0 - ? TYPE_MINIMUM (regoff_t) <= XINT (from) - : XINT (from) <= TYPE_MAXIMUM (regoff_t)) - && (XINT (marker) < 0 - ? TYPE_MINIMUM (regoff_t) <= XINT (marker) - : XINT (marker) <= TYPE_MAXIMUM (regoff_t))) + CHECK_FIXNUM_COERCE_MARKER (marker); + if (PTRDIFF_MIN <= XFIXNUM (from) && XFIXNUM (from) <= PTRDIFF_MAX + && PTRDIFF_MIN <= XFIXNUM (marker) + && XFIXNUM (marker) <= PTRDIFF_MAX) { - search_regs.start[i] = XINT (from); - search_regs.end[i] = XINT (marker); + search_regs.start[i] = XFIXNUM (from); + search_regs.end[i] = XFIXNUM (marker); } else { @@ -3056,29 +3062,36 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) return Qnil; } -/* If true the match data have been saved in saved_search_regs - during the execution of a sentinel or filter. */ -/* static bool search_regs_saved; */ -/* static struct re_registers saved_search_regs; */ -/* static Lisp_Object saved_last_thing_searched; */ +DEFUN ("match-data--translate", Fmatch_data__translate, Smatch_data__translate, + 1, 1, 0, + doc: /* Add N to all positions in the match data. Internal. */) + (Lisp_Object n) +{ + CHECK_FIXNUM (n); + EMACS_INT delta = XFIXNUM (n); + if (!NILP (last_thing_searched)) + for (ptrdiff_t i = 0; i < search_regs.num_regs; i++) + if (search_regs.start[i] >= 0) + { + search_regs.start[i] = max (0, search_regs.start[i] + delta); + search_regs.end[i] = max (0, search_regs.end[i] + delta); + } + return Qnil; +} /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data if asynchronous code (filter or sentinel) is running. */ static void save_search_regs (void) { - if (!search_regs_saved) + if (saved_search_regs.num_regs == 0) { - saved_search_regs.num_regs = search_regs.num_regs; - saved_search_regs.start = search_regs.start; - saved_search_regs.end = search_regs.end; + saved_search_regs = search_regs; saved_last_thing_searched = last_thing_searched; last_thing_searched = Qnil; search_regs.num_regs = 0; search_regs.start = 0; search_regs.end = 0; - - search_regs_saved = 1; } } @@ -3086,19 +3099,17 @@ save_search_regs (void) void restore_search_regs (void) { - if (search_regs_saved) + if (saved_search_regs.num_regs != 0) { if (search_regs.num_regs > 0) { xfree (search_regs.start); xfree (search_regs.end); } - search_regs.num_regs = saved_search_regs.num_regs; - search_regs.start = saved_search_regs.start; - search_regs.end = saved_search_regs.end; + search_regs = saved_search_regs; last_thing_searched = saved_last_thing_searched; saved_last_thing_searched = Qnil; - search_regs_saved = 0; + saved_search_regs.num_regs = 0; } } @@ -3170,18 +3181,20 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, } Lisp_Object result - = make_specified_string (temp, - SCHARS (string) + backslashes_added, - out - temp, - STRING_MULTIBYTE (string)); + = (backslashes_added > 0 + ? make_specified_string (temp, + SCHARS (string) + backslashes_added, + out - temp, + STRING_MULTIBYTE (string)) + : string); SAFE_FREE (); return result; } /* Like find_newline, but doesn't use the cache, and only searches forward. */ -static ptrdiff_t +ptrdiff_t find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, - ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage, + ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted, ptrdiff_t *bytepos, bool allow_quit) { if (count > 0) @@ -3197,8 +3210,8 @@ find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, if (end_byte == -1) end_byte = CHAR_TO_BYTE (end); - if (shortage != 0) - *shortage = 0; + if (counted) + *counted = count; if (count > 0) while (start != end) @@ -3255,8 +3268,8 @@ find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, } } - if (shortage) - *shortage = count; + if (counted) + *counted -= count; if (bytepos) { *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte; @@ -3277,7 +3290,7 @@ the buffer. If the buffer doesn't have a cache, the value is nil. */) (Lisp_Object buffer) { struct buffer *buf, *old = NULL; - ptrdiff_t shortage, nl_count_cache, nl_count_buf; + ptrdiff_t nl_count_cache, nl_count_buf; Lisp_Object cache_newlines, buf_newlines, val; ptrdiff_t from, found, i; @@ -3303,79 +3316,63 @@ the buffer. If the buffer doesn't have a cache, the value is nil. */) /* How many newlines are there according to the cache? */ find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE, - TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true); - nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage; + TYPE_MAXIMUM (ptrdiff_t), &nl_count_cache, NULL, true); /* Create vector and populate it. */ - cache_newlines = make_uninit_vector (nl_count_cache); + cache_newlines = make_vector (nl_count_cache, make_fixnum (-1)); if (nl_count_cache) { for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++) { - ptrdiff_t from_byte = CHAR_TO_BYTE (from); + ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted; - found = find_newline (from, from_byte, 0, -1, 1, &shortage, + found = find_newline (from, from_byte, 0, -1, 1, &counted, NULL, true); - if (shortage != 0 || i >= nl_count_cache) + if (counted == 0 || i >= nl_count_cache) break; - ASET (cache_newlines, i, make_number (found - 1)); + ASET (cache_newlines, i, make_fixnum (found - 1)); } - /* Fill the rest of slots with an invalid position. */ - for ( ; i < nl_count_cache; i++) - ASET (cache_newlines, i, make_number (-1)); } /* Now do the same, but without using the cache. */ find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE, - TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true); - nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage; - buf_newlines = make_uninit_vector (nl_count_buf); + TYPE_MAXIMUM (ptrdiff_t), &nl_count_buf, NULL, true); + buf_newlines = make_vector (nl_count_buf, make_fixnum (-1)); if (nl_count_buf) { for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++) { - ptrdiff_t from_byte = CHAR_TO_BYTE (from); + ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted; - found = find_newline1 (from, from_byte, 0, -1, 1, &shortage, + found = find_newline1 (from, from_byte, 0, -1, 1, &counted, NULL, true); - if (shortage != 0 || i >= nl_count_buf) + if (counted == 0 || i >= nl_count_buf) break; - ASET (buf_newlines, i, make_number (found - 1)); + ASET (buf_newlines, i, make_fixnum (found - 1)); } - for ( ; i < nl_count_buf; i++) - ASET (buf_newlines, i, make_number (-1)); } /* Construct the value and return it. */ - val = make_uninit_vector (2); - ASET (val, 0, cache_newlines); - ASET (val, 1, buf_newlines); + val = CALLN (Fvector, cache_newlines, buf_newlines); if (old != NULL) set_buffer_internal_1 (old); return val; } + +static void syms_of_search_for_pdumper (void); + void syms_of_search (void) { - register int i; - - for (i = 0; i < REGEXP_CACHE_SIZE; ++i) + for (int i = 0; i < REGEXP_CACHE_SIZE; ++i) { - searchbufs[i].buf.allocated = 100; - searchbufs[i].buf.buffer = xmalloc (100); - searchbufs[i].buf.fastmap = searchbufs[i].fastmap; - searchbufs[i].regexp = Qnil; - searchbufs[i].f_whitespace_regexp = Qnil; - searchbufs[i].syntax_table = Qnil; staticpro (&searchbufs[i].regexp); staticpro (&searchbufs[i].f_whitespace_regexp); staticpro (&searchbufs[i].syntax_table); - searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]); } - searchbuf_head = &searchbufs[0]; /* Error condition used for failing searches. */ DEFSYM (Qsearch_failed, "search-failed"); @@ -3388,33 +3385,31 @@ syms_of_search (void) DEFSYM (Qinvalid_regexp, "invalid-regexp"); Fput (Qsearch_failed, Qerror_conditions, - listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror)); + pure_list (Qsearch_failed, Qerror)); Fput (Qsearch_failed, Qerror_message, build_pure_c_string ("Search failed")); Fput (Quser_search_failed, Qerror_conditions, - listn (CONSTYPE_PURE, 4, - Quser_search_failed, Quser_error, Qsearch_failed, Qerror)); + pure_list (Quser_search_failed, Quser_error, Qsearch_failed, Qerror)); Fput (Quser_search_failed, Qerror_message, build_pure_c_string ("Search failed")); Fput (Qinvalid_regexp, Qerror_conditions, - listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror)); + pure_list (Qinvalid_regexp, Qerror)); Fput (Qinvalid_regexp, Qerror_message, build_pure_c_string ("Invalid regexp")); - last_thing_searched = Qnil; - staticpro (&last_thing_searched); - - saved_last_thing_searched = Qnil; - staticpro (&saved_last_thing_searched); + re_match_object = Qnil; + staticpro (&re_match_object); DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp, doc: /* Regexp to substitute for bunches of spaces in regexp search. Some commands use this for user-specified regexps. Spaces that occur inside character classes or repetition operators or other such regexp constructs are not replaced with this. -A value of nil (which is the normal value) means treat spaces literally. */); +A value of nil (which is the normal value) means treat spaces +literally. Note that a value with capturing groups can change the +numbering of existing capture groups in unexpected ways. */); Vsearch_spaces_regexp = Qnil; DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data"); @@ -3441,6 +3436,26 @@ is to bind it with `let' around a small expression. */); defsubr (&Smatch_end); defsubr (&Smatch_data); defsubr (&Sset_match_data); + defsubr (&Smatch_data__translate); defsubr (&Sregexp_quote); defsubr (&Snewline_cache_check); + + pdumper_do_now_and_after_load (syms_of_search_for_pdumper); +} + +static void +syms_of_search_for_pdumper (void) +{ + for (int i = 0; i < REGEXP_CACHE_SIZE; ++i) + { + searchbufs[i].buf.allocated = 100; + searchbufs[i].buf.buffer = xmalloc (100); + searchbufs[i].buf.fastmap = searchbufs[i].fastmap; + searchbufs[i].regexp = Qnil; + searchbufs[i].f_whitespace_regexp = Qnil; + searchbufs[i].busy = false; + searchbufs[i].syntax_table = Qnil; + searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]); + } + searchbuf_head = &searchbufs[0]; } |