summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/emacs.c22
-rw-r--r--src/regex.c73
-rw-r--r--src/regex.h7
3 files changed, 62 insertions, 40 deletions
diff --git a/src/emacs.c b/src/emacs.c
index ae29e9ad29b..28b395c4fb4 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -831,14 +831,16 @@ main (int argc, char **argv)
rlim_t lim = rlim.rlim_cur;
/* Approximate the amount regex.c needs per unit of
- re_max_failures, then add 33% to cover the size of the
+ emacs_re_max_failures, then add 33% to cover the size of the
smaller stacks that regex.c successively allocates and
discards on its way to the maximum. */
- int ratio = 20 * sizeof (char *);
- ratio += ratio / 3;
+ int min_ratio = 20 * sizeof (char *);
+ int ratio = min_ratio + min_ratio / 3;
- /* Extra space to cover what we're likely to use for other reasons. */
- int extra = 200000;
+ /* Extra space to cover what we're likely to use for other
+ reasons. For example, a typical GC might take 30K stack
+ frames. */
+ int extra = (30 * 1000) * 50;
bool try_to_grow_stack = true;
#ifndef CANNOT_DUMP
@@ -847,7 +849,7 @@ main (int argc, char **argv)
if (try_to_grow_stack)
{
- rlim_t newlim = re_max_failures * ratio + extra;
+ rlim_t newlim = emacs_re_max_failures * ratio + extra;
/* Round the new limit to a page boundary; this is needed
for Darwin kernel 15.4.0 (see Bug#23622) and perhaps
@@ -869,9 +871,11 @@ main (int argc, char **argv)
lim = newlim;
}
}
-
- /* Don't let regex.c overflow the stack. */
- re_max_failures = lim < extra ? 0 : min (lim - extra, SIZE_MAX) / ratio;
+ /* If the stack is big enough, let regex.c more of it before
+ falling back to heap allocation. */
+ emacs_re_safe_alloca = max
+ (min (lim - extra, SIZE_MAX) * (min_ratio / ratio),
+ MAX_ALLOCA);
}
#endif /* HAVE_SETRLIMIT and RLIMIT_STACK and not CYGWIN */
diff --git a/src/regex.c b/src/regex.c
index 8aa54331fc7..db3f0c16a2d 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -431,9 +431,12 @@ init_syntax_once (void)
/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
use `alloca' instead of `malloc'. This is because using malloc in
- re_search* or re_match* could cause memory leaks when C-g is used in
- Emacs; also, malloc is slower and causes storage fragmentation. On
- the other hand, malloc is more portable, and easier to debug.
+ re_search* or re_match* could cause memory leaks when C-g is used
+ in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
+ via `record_xmalloc' which uses `unwind_protect' to ensure the
+ memory is freed even in case of non-local exits); also, malloc is
+ slower and causes storage fragmentation. On the other hand, malloc
+ is more portable, and easier to debug.
Because we sometimes use alloca, some routines have to be macros,
not functions -- `alloca'-allocated space disappears at the end of the
@@ -448,7 +451,13 @@ init_syntax_once (void)
#else /* not REGEX_MALLOC */
# ifdef emacs
-# define REGEX_USE_SAFE_ALLOCA USE_SAFE_ALLOCA
+/* This may be adjusted in main(), if the stack is successfully grown. */
+ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
+/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */
+# define REGEX_USE_SAFE_ALLOCA \
+ ptrdiff_t sa_avail = emacs_re_safe_alloca; \
+ ptrdiff_t sa_count = SPECPDL_INDEX (); bool sa_must_free = false
+
# define REGEX_SAFE_FREE() SAFE_FREE ()
# define REGEX_ALLOCATE SAFE_ALLOCA
# else
@@ -1196,24 +1205,28 @@ static const char *re_error_msgid[] =
gettext_noop ("Range striding over charsets") /* REG_ERANGEX */
};
-/* Avoiding alloca during matching, to placate r_alloc. */
-
-/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
- searching and matching functions should not call alloca. On some
- systems, alloca is implemented in terms of malloc, and if we're
- using the relocating allocator routines, then malloc could cause a
- relocation, which might (if the strings being searched are in the
- ralloc heap) shift the data out from underneath the regexp
- routines.
-
- Here's another reason to avoid allocation: Emacs
- processes input from X in a signal handler; processing X input may
- call malloc; if input arrives while a matching routine is calling
- malloc, then we're scrod. But Emacs can't just block input while
- calling matching routines; then we don't notice interrupts when
- they come in. So, Emacs blocks input around all regexp calls
- except the matching calls, which it leaves unprotected, in the
- faith that they will not malloc. */
+/* Whether to allocate memory during matching. */
+
+/* Define MATCH_MAY_ALLOCATE to allow the searching and matching
+ functions allocate memory for the failure stack and registers.
+ Normally should be defined, because otherwise searching and
+ matching routines will have much smaller memory resources at their
+ disposal, and therefore might fail to handle complex regexps.
+ Therefore undefine MATCH_MAY_ALLOCATE only in the following
+ exceptional situations:
+
+ . When running on a system where memory is at premium.
+ . When alloca cannot be used at all, perhaps due to bugs in
+ its implementation, or its being unavailable, or due to a
+ very small stack size. This requires to define REGEX_MALLOC
+ to use malloc instead, which in turn could lead to memory
+ leaks if search is interrupted by a signal. (For these
+ reasons, defining REGEX_MALLOC when building Emacs
+ automatically undefines MATCH_MAY_ALLOCATE, but outside
+ Emacs you may not care about memory leaks.) If you want to
+ prevent the memory leaks, undefine MATCH_MAY_ALLOCATE.
+ . When code that calls the searching and matching functions
+ cannot allow memory allocation, for whatever reasons. */
/* Normally, this is fine. */
#define MATCH_MAY_ALLOCATE
@@ -1250,9 +1263,9 @@ static const char *re_error_msgid[] =
whose default stack limit is 2mb. In order for a larger
value to work reliably, you have to try to make it accord
with the process stack limit. */
-size_t re_max_failures = 40000;
+size_t emacs_re_max_failures = 40000;
# else
-size_t re_max_failures = 4000;
+size_t emacs_re_max_failures = 4000;
# endif
union fail_stack_elt
@@ -1305,7 +1318,7 @@ typedef struct
/* Double the size of FAIL_STACK, up to a limit
- which allows approximately `re_max_failures' items.
+ which allows approximately `emacs_re_max_failures' items.
Return 1 if succeeds, and 0 if either ran out of memory
allocating space for it or it was already too large.
@@ -1320,19 +1333,19 @@ typedef struct
#define FAIL_STACK_GROWTH_FACTOR 4
#define GROW_FAIL_STACK(fail_stack) \
- (((fail_stack).size >= re_max_failures * TYPICAL_FAILURE_SIZE) \
+ (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \
? 0 \
: ((fail_stack).stack \
= REGEX_REALLOCATE_STACK ((fail_stack).stack, \
(fail_stack).size * sizeof (fail_stack_elt_t), \
- min (re_max_failures * TYPICAL_FAILURE_SIZE, \
+ min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \
* sizeof (fail_stack_elt_t)), \
\
(fail_stack).stack == NULL \
? 0 \
: ((fail_stack).size \
- = (min (re_max_failures * TYPICAL_FAILURE_SIZE, \
+ = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \
1)))
@@ -3641,9 +3654,9 @@ regex_compile (const_re_char *pattern, size_t size,
{
int num_regs = bufp->re_nsub + 1;
- if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
+ if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE)
{
- fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
+ fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE;
falk_stack.stack = realloc (fail_stack.stack,
fail_stack.size * sizeof *falk_stack.stack);
}
diff --git a/src/regex.h b/src/regex.h
index 34c9929f93d..1d439de259c 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -186,7 +186,12 @@ typedef unsigned long reg_syntax_t;
#endif
/* Roughly the maximum number of failure points on the stack. */
-extern size_t re_max_failures;
+extern size_t emacs_re_max_failures;
+
+#ifdef emacs
+/* Amount of memory that we can safely stack allocate. */
+extern ptrdiff_t emacs_re_safe_alloca;
+#endif
/* Define combinations of the above bits for the standard possibilities.