diff options
Diffstat (limited to 'src/regex-emacs.c')
-rw-r--r-- | src/regex-emacs.c | 115 |
1 files changed, 61 insertions, 54 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 5e23fc94e4f..971a5f63749 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -58,7 +58,7 @@ #define RE_STRING_CHAR(p, multibyte) \ (multibyte ? STRING_CHAR (p) : *(p)) #define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \ - (multibyte ? STRING_CHAR_AND_LENGTH (p, len) : ((len) = 1, *(p))) + (multibyte ? string_char_and_length (p, &(len)) : ((len) = 1, *(p))) #define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c) @@ -89,7 +89,7 @@ #define GET_CHAR_AFTER(c, p, len) \ do { \ if (target_multibyte) \ - (c) = STRING_CHAR_AND_LENGTH (p, len); \ + (c) = string_char_and_length (p, &(len)); \ else \ { \ (c) = *p; \ @@ -929,7 +929,7 @@ typedef struct ? 0 \ : ((fail_stack).stack \ = REGEX_REALLOCATE ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ + (fail_stack).avail * sizeof (fail_stack_elt_t), \ min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \ * sizeof (fail_stack_elt_t)), \ @@ -969,7 +969,11 @@ typedef struct #define ENSURE_FAIL_STACK(space) \ while (REMAINING_AVAIL_SLOTS <= space) { \ if (!GROW_FAIL_STACK (fail_stack)) \ - return -2; \ + { \ + unbind_to (count, Qnil); \ + SAFE_FREE (); \ + return -2; \ + } \ DEBUG_PRINT ("\n Doubled stack; size now: %td\n", fail_stack.size); \ DEBUG_PRINT (" slots available: %td\n", REMAINING_AVAIL_SLOTS);\ } @@ -979,6 +983,8 @@ while (REMAINING_AVAIL_SLOTS <= space) { \ do { \ char *destination; \ intptr_t n = num; \ + eassert (0 < n && n < num_regs); \ + eassert (REG_UNSET (regstart[n]) <= REG_UNSET (regend[n])); \ ENSURE_FAIL_STACK(3); \ DEBUG_PRINT (" Push reg %"PRIdPTR" (spanning %p -> %p)\n", \ n, regstart[n], regend[n]); \ @@ -1017,8 +1023,10 @@ do { \ } \ else \ { \ + eassert (0 < pfreg && pfreg < num_regs); \ regend[pfreg] = POP_FAILURE_POINTER (); \ regstart[pfreg] = POP_FAILURE_POINTER (); \ + eassert (REG_UNSET (regstart[pfreg]) <= REG_UNSET (regend[pfreg])); \ DEBUG_PRINT (" Pop reg %ld (spanning %p -> %p)\n", \ pfreg, regstart[pfreg], regend[pfreg]); \ } \ @@ -1757,6 +1765,7 @@ regex_compile (re_char *pattern, ptrdiff_t size, /* Initialize the compile stack. */ compile_stack.stack = xmalloc (INIT_COMPILE_STACK_SIZE * sizeof *compile_stack.stack); + __lsan_ignore_object (compile_stack.stack); compile_stack.size = INIT_COMPILE_STACK_SIZE; compile_stack.avail = 0; @@ -2113,17 +2122,20 @@ regex_compile (re_char *pattern, ptrdiff_t size, if (CHAR_BYTE8_P (c1)) c = BYTE8_TO_CHAR (128); } - if (CHAR_BYTE8_P (c)) - { - c = CHAR_TO_BYTE8 (c); - c1 = CHAR_TO_BYTE8 (c1); - for (; c <= c1; c++) - SET_LIST_BIT (c); - } - else if (multibyte) - SETUP_MULTIBYTE_RANGE (range_table_work, c, c1); - else - SETUP_UNIBYTE_RANGE (range_table_work, c, c1); + if (c <= c1) + { + if (CHAR_BYTE8_P (c)) + { + c = CHAR_TO_BYTE8 (c); + c1 = CHAR_TO_BYTE8 (c1); + for (; c <= c1; c++) + SET_LIST_BIT (c); + } + else if (multibyte) + SETUP_MULTIBYTE_RANGE (range_table_work, c, c1); + else + SETUP_UNIBYTE_RANGE (range_table_work, c, c1); + } } } @@ -3164,10 +3176,6 @@ re_search (struct re_pattern_buffer *bufp, const char *string, ptrdiff_t size, regs, size); } -/* Head address of virtual concatenation of string. */ -#define HEAD_ADDR_VSTRING(P) \ - (((P) >= size1 ? string2 : string1)) - /* Address of POS in the concatenation of virtual string. */ #define POS_ADDR_VSTRING(POS) \ (((POS) >= size1 ? string2 - size1 : string1) + (POS)) @@ -3297,7 +3305,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1, { int buf_charlen; - buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen); + buf_ch = string_char_and_length (d, &buf_charlen); buf_ch = RE_TRANSLATE (translate, buf_ch); if (fastmap[CHAR_LEADING_CODE (buf_ch)]) break; @@ -3327,7 +3335,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1, { int buf_charlen; - buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen); + buf_ch = string_char_and_length (d, &buf_charlen); if (fastmap[CHAR_LEADING_CODE (buf_ch)]) break; range -= buf_charlen; @@ -3410,16 +3418,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1, if (multibyte) { re_char *p = POS_ADDR_VSTRING (startpos) + 1; - re_char *p0 = p; - re_char *phead = HEAD_ADDR_VSTRING (startpos); + int len = raw_prev_char_len (p); - /* Find the head of multibyte form. */ - PREV_CHAR_BOUNDARY (p, phead); - range += p0 - 1 - p; + range += len - 1; if (range > 0) break; - - startpos -= p0 - 1 - p; + startpos -= len - 1; } } } @@ -3868,6 +3872,10 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string2, ptrdiff_t size2, ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop) { + eassume (0 <= size1); + eassume (0 <= size2); + eassume (0 <= pos && pos <= stop && stop <= size1 + size2); + /* General temporaries. */ int mcnt; @@ -3923,8 +3931,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, attempt) by a subexpression part of the pattern, that is, the regnum-th regstart pointer points to where in the pattern we began matching and the regnum-th regend points to right after where we - stopped matching the regnum-th subexpression. (The zeroth register - keeps track of what the whole pattern matches.) */ + stopped matching the regnum-th subexpression. */ re_char **regstart UNINIT, **regend UNINIT; /* The following record the register info as found in the above @@ -3973,29 +3980,22 @@ re_match_2_internal (struct re_pattern_buffer *bufp, /* Do not bother to initialize all the register variables if there are no groups in the pattern, as it takes a fair amount of time. If there are groups, we include space for register 0 (the whole - pattern), even though we never use it, since it simplifies the - array indexing. We should fix this. */ - if (bufp->re_nsub) + pattern) in REGSTART[0], even though we never use it, to avoid + the undefined behavior of subtracting 1 from REGSTART. */ + ptrdiff_t re_nsub = num_regs - 1; + if (0 < re_nsub) { - regstart = SAFE_ALLOCA (num_regs * 4 * sizeof *regstart); + regstart = SAFE_ALLOCA ((re_nsub * 4 + 1) * sizeof *regstart); regend = regstart + num_regs; - best_regstart = regend + num_regs; - best_regend = best_regstart + num_regs; - } + best_regstart = regend + re_nsub; + best_regend = best_regstart + re_nsub; - /* The starting position is bogus. */ - if (pos < 0 || pos > size1 + size2) - { - unbind_to (count, Qnil); - SAFE_FREE (); - return -1; + /* Initialize subexpression text positions to unset, to mark ones + that no start_memory/stop_memory has been seen for. */ + for (re_char **apos = regstart + 1; apos < best_regstart + 1; apos++) + *apos = NULL; } - /* Initialize subexpression text positions to -1 to mark ones that no - start_memory/stop_memory has been seen for. */ - for (ptrdiff_t reg = 1; reg < num_regs; reg++) - regstart[reg] = regend[reg] = NULL; - /* We move 'string1' into 'string2' if the latter's empty -- but not if 'string1' is null. */ if (size2 == 0 && string1 != NULL) @@ -4130,6 +4130,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, { regstart[reg] = best_regstart[reg]; regend[reg] = best_regend[reg]; + eassert (REG_UNSET (regstart[reg]) + <= REG_UNSET (regend[reg])); } } } /* d != end_match_2 */ @@ -4177,7 +4179,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, for (ptrdiff_t reg = 1; reg < num_regs; reg++) { - if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg])) + eassert (REG_UNSET (regstart[reg]) + <= REG_UNSET (regend[reg])); + if (REG_UNSET (regend[reg])) regs->start[reg] = regs->end[reg] = -1; else { @@ -4238,13 +4242,13 @@ re_match_2_internal (struct re_pattern_buffer *bufp, PREFETCH (); if (multibyte) - pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen); + pat_ch = string_char_and_length (p, &pat_charlen); else { pat_ch = RE_CHAR_TO_MULTIBYTE (*p); pat_charlen = 1; } - buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen); + buf_ch = string_char_and_length (d, &buf_charlen); if (TRANSLATE (buf_ch) != pat_ch) { @@ -4266,7 +4270,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, PREFETCH (); if (multibyte) { - pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen); + pat_ch = string_char_and_length (p, &pat_charlen); pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch); } else @@ -4377,12 +4381,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, registers data structure) under the register number. */ case start_memory: DEBUG_PRINT ("EXECUTING start_memory %d:\n", *p); + eassert (0 < *p && *p < num_regs); /* In case we need to undo this operation (via backtracking). */ PUSH_FAILURE_REG (*p); regstart[*p] = d; - regend[*p] = NULL; /* probably unnecessary. -sm */ DEBUG_PRINT (" regstart: %td\n", POINTER_TO_OFFSET (regstart[*p])); /* Move past the register number and inner group count. */ @@ -4395,6 +4399,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, case stop_memory: DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p); + eassert (0 < *p && *p < num_regs); eassert (!REG_UNSET (regstart[*p])); /* Strictly speaking, there should be code such as: @@ -4427,7 +4432,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno); /* Can't back reference a group which we've never matched. */ - if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) + eassert (0 < regno && regno < num_regs); + eassert (REG_UNSET (regstart[regno]) <= REG_UNSET (regend[regno])); + if (REG_UNSET (regend[regno])) goto fail; /* Where in input to try to start matching. */ |