diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/dired.c | 4 | ||||
-rw-r--r-- | src/regex.c | 76 | ||||
-rw-r--r-- | src/regex.h | 4 | ||||
-rw-r--r-- | src/search.c | 36 |
4 files changed, 103 insertions, 17 deletions
diff --git a/src/dired.c b/src/dired.c index dba575ce4c2..006f74c834d 100644 --- a/src/dired.c +++ b/src/dired.c @@ -259,9 +259,11 @@ directory_files_internal (Lisp_Object directory, Lisp_Object full, QUIT; bool wanted = (NILP (match) - || re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0); + || (re_match_object = name, + re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0)); immediate_quit = 0; + re_match_object = Qnil; /* Stop protecting name from GC. */ if (wanted) { diff --git a/src/regex.c b/src/regex.c index 164eb4612ae..1346ef401cb 100644 --- a/src/regex.c +++ b/src/regex.c @@ -152,6 +152,8 @@ /* Converts the pointer to the char to BEG-based offset from the start. */ # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) +/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean + result to get the right base index. */ # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) @@ -1436,11 +1438,62 @@ typedef struct #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer #define TOP_FAILURE_HANDLE() fail_stack.frame +#ifdef emacs +#define STR_BASE_PTR(obj) \ + (NILP (obj) ? current_buffer->text->beg : \ + STRINGP (obj) ? SDATA (obj) : \ + NULL) +#else +#define STR_BASE_PTR(obj) NULL +#endif #define ENSURE_FAIL_STACK(space) \ while (REMAINING_AVAIL_SLOTS <= space) { \ + re_char* orig_base = STR_BASE_PTR (re_match_object); \ + ptrdiff_t string1_off, end1_off, end_match_1_off; \ + ptrdiff_t string2_off, end2_off, end_match_2_off; \ + ptrdiff_t d_off, dend_off, dfail_off; \ + if (orig_base) \ + { \ + if (string1) \ + { \ + string1_off = string1 - orig_base; \ + end1_off = end1 - orig_base; \ + end_match_1_off = end_match_1 - orig_base; \ + } \ + if (string2) \ + { \ + string2_off = string2 - orig_base; \ + end2_off = end2 - orig_base; \ + end_match_2_off = end_match_2 - orig_base; \ + } \ + d_off = d - orig_base; \ + dend_off = dend - orig_base; \ + dfail_off = dfail - orig_base; \ + } \ if (!GROW_FAIL_STACK (fail_stack)) \ - return -2; \ + return -2; \ + /* GROW_FAIL_STACK may call malloc and relocate the string */ \ + /* pointers. */ \ + re_char* new_base = STR_BASE_PTR (re_match_object); \ + if (new_base && new_base != orig_base) \ + { \ + if (string1) \ + { \ + string1 = new_base + string1_off; \ + end1 = new_base + end1_off; \ + end_match_1 = new_base + end_match_1_off; \ + } \ + if (string2) \ + { \ + string2 = new_base + string2_off; \ + end2 = new_base + end2_off; \ + end_match_2 = new_base + end_match_2_off; \ + } \ + d = new_base + d_off; \ + dend = new_base + dend_off; \ + dfail = new_base + dfail_off; \ + } \ DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ } @@ -4443,6 +4496,16 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, && !bufp->can_be_null) return -1; + /* re_match_2_internal may allocate, causing a relocation of the + lisp text object that we're searching. */ + ptrdiff_t offset1, offset2; + re_char *orig_base = STR_BASE_PTR (re_match_object); + if (orig_base) + { + if (string1) offset1 = string1 - orig_base; + if (string2) offset2 = string2 - orig_base; + } + val = re_match_2_internal (bufp, string1, size1, string2, size2, startpos, regs, stop); @@ -4452,6 +4515,13 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, if (val == -2) return -2; + re_char *new_base = STR_BASE_PTR (re_match_object); + if (new_base && new_base != orig_base) + { + if (string1) string1 = offset1 + new_base; + if (string2) string2 = offset2 + new_base; + } + advance: if (!range) break; @@ -4887,8 +4957,8 @@ WEAK_ALIAS (__re_match, re_match) #endif /* not emacs */ #ifdef emacs -/* In Emacs, this is the string or buffer in which we - are matching. It is used for looking up syntax properties. */ +/* In Emacs, this is the string or buffer in which we are matching. + See the declaration in regex.h for details. */ Lisp_Object re_match_object; #endif diff --git a/src/regex.h b/src/regex.h index 51f4424ce94..61c771c045f 100644 --- a/src/regex.h +++ b/src/regex.h @@ -169,7 +169,9 @@ extern reg_syntax_t re_syntax_options; #ifdef emacs # include "lisp.h" /* In Emacs, this is the string or buffer in which we are matching. - It is used for looking up syntax properties. + It is used for looking up syntax properties, and also to recompute + pointers in case the object is relocated as a side effect of + calling malloc (if it calls r_alloc_sbrk in ralloc.c). If the value is a Lisp string object, we are matching text in that string; if it's nil, we are matching text in the current buffer; if diff --git a/src/search.c b/src/search.c index dc7e2d88603..ec5a1d7733f 100644 --- a/src/search.c +++ b/src/search.c @@ -287,8 +287,10 @@ looking_at_1 (Lisp_Object string, bool posix) immediate_quit = 1; QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ - /* Get pointers and sizes of the two strings - that make up the visible portion of the buffer. */ + /* Get pointers and sizes of the two strings that make up the + visible portion of the buffer. Note that we can use pointers + here, unlike in search_buffer, because we only call re_match_2 + once, after which we never use the pointers again. */ p1 = BEGV_ADDR; s1 = GPT_BYTE - BEGV_BYTE; @@ -407,6 +409,7 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, (NILP (Vinhibit_changing_match_data) ? &search_regs : NULL)); immediate_quit = 0; + re_match_object = Qnil; /* Stop protecting string from GC. */ /* Set last_thing_searched only when match data is changed. */ if (NILP (Vinhibit_changing_match_data)) @@ -477,6 +480,7 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string, SBYTES (string), 0, SBYTES (string), 0); immediate_quit = 0; + re_match_object = Qnil; /* Stop protecting string from GC. */ return val; } @@ -564,6 +568,7 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2, pos_byte, NULL, limit_byte); immediate_quit = 0; + re_match_object = Qnil; /* Stop protecting string from GC. */ return len; } @@ -1178,8 +1183,8 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) { - unsigned char *p1, *p2; - ptrdiff_t s1, s2; + unsigned char *base; + ptrdiff_t off1, off2, s1, s2; struct re_pattern_buffer *bufp; bufp = compile_pattern (string, @@ -1193,16 +1198,19 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, can take too long. */ QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ - /* Get pointers and sizes of the two strings - that make up the visible portion of the buffer. */ + /* Get offsets and sizes of the two strings that make up the + visible portion of the buffer. We compute offsets instead of + pointers because re_search_2 may call malloc and therefore + change the buffer text address. */ - p1 = BEGV_ADDR; + base = current_buffer->text->beg; + off1 = BEGV_ADDR - base; s1 = GPT_BYTE - BEGV_BYTE; - p2 = GAP_END_ADDR; + off2 = GAP_END_ADDR - base; s2 = ZV_BYTE - GPT_BYTE; if (s1 < 0) { - p2 = p1; + off2 = off1; s2 = ZV_BYTE - BEGV_BYTE; s1 = 0; } @@ -1217,7 +1225,9 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, { ptrdiff_t val; - val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, + val = re_search_2 (bufp, + (char*) (base + off1), s1, + (char*) (base + off2), s2, pos_byte - BEGV_BYTE, lim_byte - pos_byte, (NILP (Vinhibit_changing_match_data) ? &search_regs : &search_regs_1), @@ -1262,8 +1272,10 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, { ptrdiff_t val; - val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, - pos_byte - BEGV_BYTE, lim_byte - pos_byte, + val = re_search_2 (bufp, + (char*) (base + off1), s1, + (char*) (base + off2), s2, + pos_byte - BEGV_BYTE, lim_byte - pos_byte, (NILP (Vinhibit_changing_match_data) ? &search_regs : &search_regs_1), lim_byte - BEGV_BYTE); |