diff options
author | Michal Nazarewicz <mina86@mina86.com> | 2016-07-17 03:09:38 +0200 |
---|---|---|
committer | Michal Nazarewicz <mina86@mina86.com> | 2016-08-02 15:39:10 +0200 |
commit | 4538a5e37e8dacde4b3e828d832c4c558a146912 (patch) | |
tree | 43a158bf0635a01bf5946730ac439fd0b3b8f606 /src/regex.h | |
parent | e7257061317c604492d20f26f312b9e925aa1860 (diff) | |
download | emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.gz emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.bz2 emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.zip |
Refactor regex character class parsing in [:name:]
re_wctype function is used in three separate places and in all of
those places almost exact code extracting the name from [:name:]
surrounds it. Furthermore, re_wctype requires a NUL-terminated
string, so the name of the character class is copied to a temporary
buffer.
The code duplication and unnecessary memory copying can be avoided by
pushing the responsibility of parsing the whole [:name:] sequence to
the function.
Furthermore, since now the function has access to the length of the
character class name (since it’s doing the parsing), it can take
advantage of that information in skipping some string comparisons and
using a constant-length memcmp instead of strcmp which needs to take
care of NUL bytes.
* src/regex.c (re_wctype): Delete function. Replace it with:
(re_wctype_parse): New function which parses a whole [:name:] string
and returns a RECC_* constant or -1 if the string is not of [:name:]
format.
(regex_compile): Use re_wctype_parse.
* src/syntax.c (skip_chars): Use re_wctype_parse.
Diffstat (limited to 'src/regex.h')
-rw-r--r-- | src/regex.h | 14 |
1 files changed, 1 insertions, 13 deletions
diff --git a/src/regex.h b/src/regex.h index 817167a07ca..01b659addbb 100644 --- a/src/regex.h +++ b/src/regex.h @@ -585,25 +585,13 @@ extern void regfree (regex_t *__preg); /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ # include <wchar.h> # include <wctype.h> -#endif -#if WIDE_CHAR_SUPPORT -/* The GNU C library provides support for user-defined character classes - and the functions from ISO C amendment 1. */ -# ifdef CHARCLASS_NAME_MAX -# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX -# else -/* This shouldn't happen but some implementation might still have this - problem. Use a reasonable default value. */ -# define CHAR_CLASS_MAX_LENGTH 256 -# endif typedef wctype_t re_wctype_t; typedef wchar_t re_wchar_t; # define re_wctype wctype # define re_iswctype iswctype # define re_wctype_to_bit(cc) 0 #else -# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */ # ifndef emacs # define btowc(c) c # endif @@ -621,7 +609,7 @@ typedef enum { RECC_ERROR = 0, } re_wctype_t; extern char re_iswctype (int ch, re_wctype_t cc); -extern re_wctype_t re_wctype (const unsigned char* str); +extern re_wctype_t re_wctype_parse (const unsigned char **strp, unsigned limit); typedef int re_wchar_t; |