diff options
Diffstat (limited to 'src/regex-emacs.h')
-rw-r--r-- | src/regex-emacs.h | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/src/regex-emacs.h b/src/regex-emacs.h new file mode 100644 index 00000000000..a849cbea054 --- /dev/null +++ b/src/regex-emacs.h @@ -0,0 +1,197 @@ +/* Emacs regular expression API + + Copyright (C) 1985, 1989-1993, 1995, 2000-2018 Free Software + Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#ifndef EMACS_REGEX_H +#define EMACS_REGEX_H 1 + +#include <stddef.h> + +/* This is the structure we store register match data in. + Declare this before including lisp.h, since lisp.h (via thread.h) + uses struct re_registers. */ +struct re_registers +{ + unsigned num_regs; + ptrdiff_t *start; + ptrdiff_t *end; +}; + +#include "lisp.h" + +/* The string or buffer being matched. + It is used for looking up syntax properties. + + If the value is a Lisp string object, match text in that string; if + it's nil, match text in the current buffer; if it's t, match text + in a C string. + + This value is effectively another parameter to re_search_2 and + re_match_2. No calls into Lisp or thread switches are allowed + before setting re_match_object and calling into the regex search + and match functions. These functions capture the current value of + re_match_object into gl_state on entry. + + TODO: turn into an actual function parameter. */ +extern Lisp_Object re_match_object; + +/* Roughly the maximum number of failure points on the stack. */ +extern size_t emacs_re_max_failures; + +/* Amount of memory that we can safely stack allocate. */ +extern ptrdiff_t emacs_re_safe_alloca; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields 'buffer', 'allocated', 'fastmap', + and 'translate' can be set. After the pattern has been + compiled, the 're_nsub' field is available. All other fields are + private to the regex routines. */ + +struct re_pattern_buffer +{ + /* Space that holds the compiled pattern. It is declared as + 'unsigned char *' because its elements are + sometimes used as array indexes. */ + unsigned char *buffer; + + /* Number of bytes to which 'buffer' points. */ + size_t allocated; + + /* Number of bytes actually used in 'buffer'. */ + size_t used; + + /* Charset of unibyte characters at compiling time. */ + int charset_unibyte; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses + the fastmap, if there is one, to skip over impossible + starting points for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation + applies to a pattern when it is compiled and to a string + when it is matched. */ + Lisp_Object translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* True if and only if this pattern can match the empty string. + Well, in truth it's used only in 're_search_2', to see + whether or not we should use the fastmap, so we don't set + this absolutely perfectly; see 're_compile_fastmap'. */ + unsigned can_be_null : 1; + + /* If REGS_UNALLOCATED, allocate space in the 'regs' structure + for 'max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ + unsigned regs_allocated : 2; + + /* Set to false when 'regex_compile' compiles a pattern; set to true + by 're_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If true, the compilation of the pattern had to look up the syntax table, + so the compiled pattern is valid for the current syntax table only. */ + unsigned used_syntax : 1; + + /* If true, multi-byte form in the regexp pattern should be + recognized as a multibyte character. */ + unsigned multibyte : 1; + + /* If true, multi-byte form in the target of match should be + recognized as a multibyte character. */ + unsigned target_multibyte : 1; +}; + +/* Declarations for routines. */ + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global 're_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern (const char *pattern, size_t length, + bool posix_backtracking, + const char *whitespace_regexp, + struct re_pattern_buffer *buffer); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS is non-null). */ +extern ptrdiff_t re_search (struct re_pattern_buffer *buffer, + const char *string, size_t length, + ptrdiff_t start, ptrdiff_t range, + struct re_registers *regs); + + +/* Like 're_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern ptrdiff_t re_search_2 (struct re_pattern_buffer *buffer, + const char *string1, size_t length1, + const char *string2, size_t length2, + ptrdiff_t start, ptrdiff_t range, + struct re_registers *regs, + ptrdiff_t stop); + + +/* Like 're_search_2', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern ptrdiff_t re_match_2 (struct re_pattern_buffer *buffer, + const char *string1, size_t length1, + const char *string2, size_t length2, + ptrdiff_t start, struct re_registers *regs, + ptrdiff_t stop); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least 'NUM_REGS * sizeof + (ptrdiff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers (struct re_pattern_buffer *buffer, + struct re_registers *regs, + unsigned num_regs, + ptrdiff_t *starts, ptrdiff_t *ends); + +/* Character classes. */ +typedef enum { RECC_ERROR = 0, + RECC_ALNUM, RECC_ALPHA, RECC_WORD, + RECC_GRAPH, RECC_PRINT, + RECC_LOWER, RECC_UPPER, + RECC_PUNCT, RECC_CNTRL, + RECC_DIGIT, RECC_XDIGIT, + RECC_BLANK, RECC_SPACE, + RECC_MULTIBYTE, RECC_NONASCII, + RECC_ASCII, RECC_UNIBYTE +} re_wctype_t; + +extern bool re_iswctype (int ch, re_wctype_t cc); +extern re_wctype_t re_wctype_parse (const unsigned char **strp, + unsigned limit); + +#endif /* EMACS_REGEX_H */ |