Refactor regex character class parsing in [:name:]

re_wctype function is used in three separate places and in all of those places almost exact code extracting the name from [:name:] surrounds it. Furthermore, re_wctype requires a NUL-terminated string, so the name of the character class is copied to a temporary buffer. The code duplication and unnecessary memory copying can be avoided by pushing the responsibility of parsing the whole [:name:] sequence to the function. Furthermore, since now the function has access to the length of the character class name (since it’s doing the parsing), it can take advantage of that information in skipping some string comparisons and using a constant-length memcmp instead of strcmp which needs to take care of NUL bytes. * src/regex.c (re_wctype): Delete function. Replace it with: (re_wctype_parse): New function which parses a whole [:name:] string and returns a RECC_* constant or -1 if the string is not of [:name:] format. (regex_compile): Use re_wctype_parse. * src/syntax.c (skip_chars): Use re_wctype_parse.
author: Michal Nazarewicz <mina86@mina86.com> 2016-07-17 03:09:38 +0200
committer: Michal Nazarewicz <mina86@mina86.com> 2016-08-02 15:39:10 +0200
commit: 4538a5e37e8dacde4b3e828d832c4c558a146912 (patch)
tree: 43a158bf0635a01bf5946730ac439fd0b3b8f606 /src/regex.h
parent: e7257061317c604492d20f26f312b9e925aa1860 (diff)
download: emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.gz
emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.bz2
emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.zip
1 files changed, 1 insertions, 13 deletions
diff --git a/src/regex.h b/src/regex.h
index 817167a07ca..01b659addbb 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -585,25 +585,13 @@ extern void regfree (regex_t *__preg);
 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
 # include <wchar.h>
 # include <wctype.h>
-#endif
 
-#if WIDE_CHAR_SUPPORT
-/* The GNU C library provides support for user-defined character classes
-   and the functions from ISO C amendment 1.  */
-# ifdef CHARCLASS_NAME_MAX
-#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
-# else
-/* This shouldn't happen but some implementation might still have this
-   problem.  Use a reasonable default value.  */
-#  define CHAR_CLASS_MAX_LENGTH 256
-# endif
 typedef wctype_t re_wctype_t;
 typedef wchar_t re_wchar_t;
 # define re_wctype wctype
 # define re_iswctype iswctype
 # define re_wctype_to_bit(cc) 0
 #else
-# define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
 # ifndef emacs
 #  define btowc(c) c
 # endif
@@ -621,7 +609,7 @@ typedef enum { RECC_ERROR = 0,
 } re_wctype_t;
 
 extern char re_iswctype (int ch,    re_wctype_t cc);
-extern re_wctype_t re_wctype (const unsigned char* str);
+extern re_wctype_t re_wctype_parse (const unsigned char **strp, unsigned limit);
 
 typedef int re_wchar_t;
author	Michal Nazarewicz <mina86@mina86.com>	2016-07-17 03:09:38 +0200
committer	Michal Nazarewicz <mina86@mina86.com>	2016-08-02 15:39:10 +0200
commit	4538a5e37e8dacde4b3e828d832c4c558a146912 (patch)
tree	43a158bf0635a01bf5946730ac439fd0b3b8f606 /src/regex.h
parent	e7257061317c604492d20f26f312b9e925aa1860 (diff)
download	emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.gz emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.bz2 emacs-4538a5e37e8dacde4b3e828d832c4c558a146912.zip