#include <sys/types.h>#include <stdlib.h>#include <string.h>#include "regex.h"#include <ctype.h>
Go to the source code of this file.
Classes | |
| struct | compile_stack_elt_t |
| struct | compile_stack_type |
| union | fail_stack_elt |
| struct | fail_stack_type |
| union | register_info_type |
Defines | |
| #define | _GNU_SOURCE |
| #define | assert(e) |
| #define | AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) |
| #define | AT_STRINGS_END(d) ((d) == end2) |
| #define | bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) |
| #define | bcopy(s, d, n) memcpy ((d), (s), (n)) |
| #define | BUF_PUSH(c) |
| #define | BUF_PUSH_2(c1, c2) |
| #define | BUF_PUSH_3(c1, c2, c3) |
| #define | BYTEWIDTH 8 |
| #define | bzero(s, n) memset ((s), 0, (n)) |
| #define | CHAR_CLASS_MAX_LENGTH 6 |
| #define | CHAR_SET_SIZE 256 |
| #define | COMPILE_STACK_EMPTY (compile_stack.avail == 0) |
| #define | COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) |
| #define | COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) |
| #define | DEBUG_POP(item_addr) |
| #define | DEBUG_PRINT1(x) |
| #define | DEBUG_PRINT2(x1, x2) |
| #define | DEBUG_PRINT3(x1, x2, x3) |
| #define | DEBUG_PRINT4(x1, x2, x3, x4) |
| #define | DEBUG_PRINT_COMPILED_PATTERN(p, s, e) |
| #define | DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) |
| #define | DEBUG_PUSH(item) |
| #define | DEBUG_STATEMENT(e) |
| #define | DOUBLE_FAIL_STACK(fail_stack) |
| #define | EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) |
| #define | EXTEND_BUFFER() |
| #define | EXTRACT_NUMBER(destination, source) |
| #define | EXTRACT_NUMBER_AND_INCR(destination, source) |
| #define | FAIL_STACK_EMPTY() (fail_stack.avail == 0) |
| #define | FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) |
| #define | FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) |
| #define | false 0 |
| #define | FIRST_STRING_P(ptr) (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) |
| #define | FREE_STACK_RETURN(value) return (free (compile_stack.stack), value) |
| #define | FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL |
| #define | FREE_VARIABLES() |
| #define | GET_BUFFER_SPACE(n) |
| #define | GET_UNSIGNED_NUMBER(num) |
| #define | gettext(msgid) (msgid) |
| #define | gettext_noop(String) String |
| #define | HAVE_STRING_H |
| #define | INIT_BUF_SIZE 32 |
| #define | INIT_COMPILE_STACK_SIZE 32 |
| #define | INIT_FAIL_STACK() |
| #define | INIT_FAILURE_ALLOC 5 |
| #define | INSERT_JUMP(op, loc, to) insert_op1 (op, loc, (int) ((to) - (loc) - 3), b) |
| #define | INSERT_JUMP2(op, loc, to, arg) insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b) |
| #define | IS_ACTIVE(R) ((R).bits.is_active) |
| #define | IS_CHAR_CLASS(string) |
| #define | ISALNUM(c) (ISASCII (c) && isalnum (c)) |
| #define | ISALPHA(c) (ISASCII (c) && isalpha (c)) |
| #define | ISASCII(c) 1 |
| #define | ISBLANK(c) ((c) == ' ' || (c) == '\t') |
| #define | ISCNTRL(c) (ISASCII (c) && iscntrl (c)) |
| #define | ISDIGIT(c) (ISASCII (c) && isdigit (c)) |
| #define | ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) |
| #define | ISLOWER(c) (ISASCII (c) && islower (c)) |
| #define | ISPRINT(c) (ISASCII (c) && isprint (c)) |
| #define | ISPUNCT(c) (ISASCII (c) && ispunct (c)) |
| #define | ISSPACE(c) (ISASCII (c) && isspace (c)) |
| #define | ISUPPER(c) (ISASCII (c) && isupper (c)) |
| #define | ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) |
| #define | MATCH_MAY_ALLOCATE |
| #define | MATCH_NULL_UNSET_VALUE 3 |
| #define | MATCHED_SOMETHING(R) ((R).bits.matched_something) |
| #define | MATCHING_IN_FIRST_STRING (dend == end_match_1) |
| #define | MAX(a, b) ((a) > (b) ? (a) : (b)) |
| #define | MAX_BUF_SIZE (1L << 16) |
| #define | MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) |
| #define | MAX_REGNUM 255 |
| #define | MIN(a, b) ((a) < (b) ? (a) : (b)) |
| #define | NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) |
| #define | NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) |
| #define | NULL (void *)0 |
| #define | NUM_FAILURE_ITEMS |
| #define | NUM_NONREG_ITEMS 4 |
| #define | NUM_REG_ITEMS 3 |
| #define | PATFETCH(c) |
| #define | PATFETCH_RAW(c) |
| #define | PATUNFETCH p-- |
| #define | POINTER_TO_OFFSET(ptr) |
| #define | POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] |
| #define | POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer |
| #define | POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info) |
| #define | POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer |
| #define | PREFETCH() |
| #define | PUSH_FAILURE_ELT(item) fail_stack.stack[fail_stack.avail++] = (item) |
| #define | PUSH_FAILURE_INT(item) fail_stack.stack[fail_stack.avail++].integer = (item) |
| #define | PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) |
| #define | PUSH_FAILURE_POINTER(item) fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) |
| #define | PUSH_PATTERN_OP(POINTER, FAIL_STACK) |
| #define | REALLOC(p, s) realloc ((p), (s)) |
| #define | REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) |
| #define | REG_UNSET(e) ((e) == REG_UNSET_VALUE) |
| #define | REG_UNSET_VALUE (®_unset_dummy) |
| #define | REGEX_ALLOCATE alloca |
| #define | REGEX_ALLOCATE_STACK alloca |
| #define | REGEX_FREE(arg) ((void)0) |
| #define | REGEX_FREE_STACK(arg) |
| #define | REGEX_REALLOCATE(source, osize, nsize) |
| #define | REGEX_REALLOCATE_STACK(source, osize, nsize) REGEX_REALLOCATE (source, osize, nsize) |
| #define | REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) |
| #define | REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) |
| #define | RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) |
| #define | RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) |
| #define | RETALLOC_IF(addr, n, t) if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) |
| #define | SET_LIST_BIT(c) |
| #define | SET_REGS_MATCHED() |
| #define | SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) |
| #define | STORE_JUMP(op, loc, to) store_op1 (op, loc, (int) ((to) - (loc) - 3)) |
| #define | STORE_JUMP2(op, loc, to, arg) store_op2 (op, loc, (int) ((to) - (loc) - 3), arg) |
| #define | STORE_NUMBER(destination, number) |
| #define | STORE_NUMBER_AND_INCR(destination, number) |
| #define | STREQ(s1, s2) ((strcmp (s1, s2) == 0)) |
| #define | SWITCH_ENUM_CAST(x) (x) |
| #define | Sword 1 |
| #define | SYNTAX(c) re_syntax_table[c] |
| #define | TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) |
| #define | TRANSLATE(d) (translate ? (char) translate[(unsigned char) (d)] : (d)) |
| #define | true 1 |
| #define | WORDCHAR_P(d) |
Typedefs | |
| typedef char | boolean |
| typedef union fail_stack_elt | fail_stack_elt_t |
| typedef long | pattern_offset_t |
| typedef unsigned | regnum_t |
Enumerations | |
| enum | re_opcode_t { no_op = 0, succeed, exactn, anychar, charset, charset_not, start_memory, stop_memory, duplicate, begline, endline, begbuf, endbuf, jump, jump_past_alt, on_failure_jump, on_failure_keep_string_jump, pop_failure_jump, maybe_pop_jump, dummy_failure_jump, push_dummy_failure, succeed_n, jump_n, set_number_at, wordchar, notwordchar, wordbeg, wordend, wordbound, notwordbound } |
Functions | |
| static int bcmp_translate | _RE_ARGS ((const char *s1, const char *s2, int len, char *translate)) |
| static boolean alt_match_null_string_p | _RE_ARGS ((unsigned char *p, unsigned char *end, register_info_type *reg_info)) |
| static boolean group_match_null_string_p | _RE_ARGS ((unsigned char **p, unsigned char *end, register_info_type *reg_info)) |
| static boolean group_in_compile_stack | _RE_ARGS ((compile_stack_typecompile_stack, regnum_t regnum)) |
| static reg_errcode_t compile_range | _RE_ARGS ((const char **p_ptr, const char *pend, char *translate, reg_syntax_t syntax, unsigned char *b)) |
| static boolean at_endline_loc_p | _RE_ARGS ((const char *p, const char *pend, reg_syntax_t syntax)) |
| static boolean at_begline_loc_p | _RE_ARGS ((const char *pattern, const char *p, reg_syntax_t syntax)) |
| static void insert_op2 | _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)) |
| static void insert_op1 | _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)) |
| static void store_op2 | _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg1, int arg2)) |
| static void store_op1 | _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg)) |
| static reg_errcode_t regex_compile | _RE_ARGS ((const char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)) |
| static boolean | alt_match_null_string_p (unsigned char *p, unsigned char *end, register_info_type *reg_info) |
| static boolean | at_begline_loc_p (char *pattern, char *p, reg_syntax_t syntax) const |
| static boolean | at_endline_loc_p (char *p, char *pend, reg_syntax_t syntax) const |
| static int | bcmp_translate (char *s1, char *s2, int len, RE_TRANSLATE_TYPE translate) const |
| static boolean | common_op_match_null_string_p (unsigned char **p, unsigned char *end, register_info_type *reg_info) |
| static reg_errcode_t | compile_range (char **p_ptr, char *pend, RE_TRANSLATE_TYPE translate, reg_syntax_t syntax, unsigned char *b) const |
| static boolean | group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) |
| static boolean | group_match_null_string_p (unsigned char **p, unsigned char *end, register_info_type *reg_info) |
| static void | init_syntax_once () |
| static void | insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) |
| static void | insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end) |
| int | re_compile_fastmap (struct re_pattern_buffer *bufp) |
| const char * | re_compile_pattern (char *pattern, size_t length, struct re_pattern_buffer *bufp) const |
| int | re_match (struct re_pattern_buffer *bufp, const char *string, int size, int pos, struct re_registers *regs) |
| int | re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop) |
| static int | re_match_2_internal (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop) |
| static int | re_match_2_internal () |
| int | re_search (struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range, struct re_registers *regs) |
| int | re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int startpos, int range, struct re_registers *regs, int stop) |
| void | re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned num_regs, regoff_t *starts, regoff_t *ends) |
| reg_syntax_t | re_set_syntax (reg_syntax_t syntax) |
| int | regcomp (regex_t *preg, const char *pattern, int cflags) |
| size_t | regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) |
| static reg_errcode_t | regex_compile (char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp) const |
| int | regexec (regex_t *preg, const char *string, size_t nmatch, pmatch, int eflags) const |
| void | regfree (regex_t *preg) |
| static void | store_op1 (re_opcode_t op, unsigned char *loc, int arg) |
| static void | store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) |
Variables | |
| static const char * | re_error_msgid [] |
| int | re_max_failures = 20000 |
| reg_syntax_t | re_syntax_options |
| static char | re_syntax_table [CHAR_SET_SIZE] |
| static char | reg_unset_dummy |
| #define AT_STRINGS_BEG | ( | d | ) | ((d) == (size1 ? string1 : string2) || !size2) |
| #define BUF_PUSH | ( | c | ) |
do { \ GET_BUFFER_SPACE (1); \ *b++ = (unsigned char) (c); \ } while (0)
| #define BUF_PUSH_2 | ( | c1, | |||
| c2 | ) |
do { \ GET_BUFFER_SPACE (2); \ *b++ = (unsigned char) (c1); \ *b++ = (unsigned char) (c2); \ } while (0)
| #define BUF_PUSH_3 | ( | c1, | |||
| c2, | |||||
| c3 | ) |
do { \ GET_BUFFER_SPACE (3); \ *b++ = (unsigned char) (c1); \ *b++ = (unsigned char) (c2); \ *b++ = (unsigned char) (c3); \ } while (0)
| #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) |
| #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) |
| #define DOUBLE_FAIL_STACK | ( | fail_stack | ) |
((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ ? 0 \ : ((fail_stack).stack = (fail_stack_elt_t *) \ REGEX_REALLOCATE_STACK ((fail_stack).stack, \ (fail_stack).size * sizeof (fail_stack_elt_t), \ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ \ (fail_stack).stack == NULL \ ? 0 \ : ((fail_stack).size <<= 1, \ 1)))
| #define EVER_MATCHED_SOMETHING | ( | R | ) | ((R).bits.ever_matched_something) |
| #define EXTEND_BUFFER | ( | ) |
do { \ unsigned char *old_buffer = bufp->buffer; \ if (bufp->allocated == MAX_BUF_SIZE) \ return REG_ESIZE; \ bufp->allocated <<= 1; \ if (bufp->allocated > MAX_BUF_SIZE) \ bufp->allocated = MAX_BUF_SIZE; \ bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\ if (bufp->buffer == NULL) \ return REG_ESPACE; \ /* If the buffer moved, move all the pointers into it. */ \ if (old_buffer != bufp->buffer) \ { \ b = (b - old_buffer) + bufp->buffer; \ begalt = (begalt - old_buffer) + bufp->buffer; \ if (fixup_alt_jump) \ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ if (laststart) \ laststart = (laststart - old_buffer) + bufp->buffer; \ if (pending_exact) \ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ } \ } while (0)
| #define EXTRACT_NUMBER | ( | destination, | |||
| source | ) |
do { \ (destination) = *(source) & 0377; \ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ } while (0)
| #define EXTRACT_NUMBER_AND_INCR | ( | destination, | |||
| source | ) |
do { \ EXTRACT_NUMBER (destination, source); \ (source) += 2; \ } while (0)
| #define FAIL_STACK_FULL | ( | ) | (fail_stack.avail == fail_stack.size) |
| #define FAIL_STACK_PTR_EMPTY | ( | ) | (fail_stack_ptr->avail == 0) |
| #define FIRST_STRING_P | ( | ptr | ) | (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) |
| #define FREE_STACK_RETURN | ( | value | ) | return (free (compile_stack.stack), value) |
| #define FREE_VAR | ( | var | ) | if (var) REGEX_FREE (var); var = NULL |
| #define FREE_VARIABLES | ( | ) |
| #define GET_BUFFER_SPACE | ( | n | ) |
while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ EXTEND_BUFFER ()
| #define GET_UNSIGNED_NUMBER | ( | num | ) |
| #define INIT_FAIL_STACK | ( | ) |
do { \ fail_stack.stack = (fail_stack_elt_t *) \ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ \ if (fail_stack.stack == NULL) \ return -2; \ \ fail_stack.size = INIT_FAILURE_ALLOC; \ fail_stack.avail = 0; \ } while (0)
| #define INSERT_JUMP | ( | op, | |||
| loc, | |||||
| to | ) | insert_op1 (op, loc, (int) ((to) - (loc) - 3), b) |
| #define INSERT_JUMP2 | ( | op, | |||
| loc, | |||||
| to, | |||||
| arg | ) | insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b) |
| #define IS_CHAR_CLASS | ( | string | ) |
(STREQ (string, "alpha") || STREQ (string, "upper") \ || STREQ (string, "lower") || STREQ (string, "digit") \ || STREQ (string, "alnum") || STREQ (string, "xdigit") \ || STREQ (string, "space") || STREQ (string, "print") \ || STREQ (string, "punct") || STREQ (string, "graph") \ || STREQ (string, "cntrl") || STREQ (string, "blank"))
| #define ISGRAPH | ( | c | ) | (ISASCII (c) && isprint (c) && !isspace (c)) |
| #define MATCHED_SOMETHING | ( | R | ) | ((R).bits.matched_something) |
| #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) |
| #define NUM_FAILURE_ITEMS |
(((0 \
? 0 : highest_active_reg - lowest_active_reg + 1) \
* NUM_REG_ITEMS) \
+ NUM_NONREG_ITEMS)
| #define PATFETCH | ( | c | ) |
| #define PATFETCH_RAW | ( | c | ) |
| #define POINTER_TO_OFFSET | ( | ptr | ) |
(FIRST_STRING_P (ptr) \ ? ((regoff_t) ((ptr) - string1)) \ : ((regoff_t) ((ptr) - string2 + size1)))
| #define POP_FAILURE_ELT | ( | ) | fail_stack.stack[--fail_stack.avail] |
| #define POP_FAILURE_INT | ( | ) | fail_stack.stack[--fail_stack.avail].integer |
| #define POP_FAILURE_POINT | ( | str, | |||
| pat, | |||||
| low_reg, | |||||
| high_reg, | |||||
| regstart, | |||||
| regend, | |||||
| reg_info | ) |
| #define POP_FAILURE_POINTER | ( | ) | fail_stack.stack[--fail_stack.avail].pointer |
| #define PREFETCH | ( | ) |
| #define PUSH_FAILURE_ELT | ( | item | ) | fail_stack.stack[fail_stack.avail++] = (item) |
| #define PUSH_FAILURE_INT | ( | item | ) | fail_stack.stack[fail_stack.avail++].integer = (item) |
| #define PUSH_FAILURE_POINT | ( | pattern_place, | |||
| string_place, | |||||
| failure_code | ) |
| #define PUSH_FAILURE_POINTER | ( | item | ) | fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) |
| #define PUSH_PATTERN_OP | ( | POINTER, | |||
| FAIL_STACK | ) |
((FAIL_STACK_FULL () \ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ ? 0 \ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1))
| #define REG_MATCH_NULL_STRING_P | ( | R | ) | ((R).bits.match_null_string_p) |
| #define REG_UNSET_VALUE (®_unset_dummy) |
| #define REGEX_REALLOCATE | ( | source, | |||
| osize, | |||||
| nsize | ) |
| #define REGEX_REALLOCATE_STACK | ( | source, | |||
| osize, | |||||
| nsize | ) | REGEX_REALLOCATE (source, osize, nsize) |
| #define REGEX_TALLOC | ( | n, | |||
| t | ) | ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) |
| #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) |
| #define RESET_FAIL_STACK | ( | ) | REGEX_FREE_STACK (fail_stack.stack) |
| #define RETALLOC | ( | addr, | |||
| n, | |||||
| t | ) | ((addr) = (t *) realloc (addr, (n) * sizeof (t))) |
| #define RETALLOC_IF | ( | addr, | |||
| n, | |||||
| t | ) | if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) |
| #define SET_LIST_BIT | ( | c | ) |
| #define SET_REGS_MATCHED | ( | ) |
do \ { \ if (!set_regs_matched_done) \ { \ active_reg_t r; \ set_regs_matched_done = 1; \ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ { \ MATCHED_SOMETHING (reg_info[r]) \ = EVER_MATCHED_SOMETHING (reg_info[r]) \ = 1; \ } \ } \ } \ while (0)
| #define SIGN_EXTEND_CHAR | ( | c | ) | ((((unsigned char) (c)) ^ 128) - 128) |
| #define STORE_JUMP | ( | op, | |||
| loc, | |||||
| to | ) | store_op1 (op, loc, (int) ((to) - (loc) - 3)) |
| #define STORE_JUMP2 | ( | op, | |||
| loc, | |||||
| to, | |||||
| arg | ) | store_op2 (op, loc, (int) ((to) - (loc) - 3), arg) |
| #define STORE_NUMBER | ( | destination, | |||
| number | ) |
| #define STORE_NUMBER_AND_INCR | ( | destination, | |||
| number | ) |
do { \ STORE_NUMBER (destination, number); \ (destination) += 2; \ } while (0)
| #define SYNTAX | ( | c | ) | re_syntax_table[c] |
| #define TRANSLATE | ( | d | ) | (translate ? (char) translate[(unsigned char) (d)] : (d)) |
| #define WORDCHAR_P | ( | d | ) |
| typedef union fail_stack_elt fail_stack_elt_t |
| typedef long pattern_offset_t |
| enum re_opcode_t |
Definition at line 353 of file regex.c.
00354 { 00355 no_op = 0, 00356 00357 /* Succeed right away--no more backtracking. */ 00358 succeed, 00359 00360 /* Followed by one byte giving n, then by n literal bytes. */ 00361 exactn, 00362 00363 /* Matches any (more or less) character. */ 00364 anychar, 00365 00366 /* Matches any one char belonging to specified set. First 00367 following byte is number of bitmap bytes. Then come bytes 00368 for a bitmap saying which chars are in. Bits in each byte 00369 are ordered low-bit-first. A character is in the set if its 00370 bit is 1. A character too large to have a bit in the map is 00371 automatically not in the set. */ 00372 charset, 00373 00374 /* Same parameters as charset, but match any character that is 00375 not one of those specified. */ 00376 charset_not, 00377 00378 /* Start remembering the text that is matched, for storing in a 00379 register. Followed by one byte with the register number, in 00380 the range 0 to one less than the pattern buffer's re_nsub 00381 field. Then followed by one byte with the number of groups 00382 inner to this one. (This last has to be part of the 00383 start_memory only because we need it in the on_failure_jump 00384 of re_match_2.) */ 00385 start_memory, 00386 00387 /* Stop remembering the text that is matched and store it in a 00388 memory register. Followed by one byte with the register 00389 number, in the range 0 to one less than `re_nsub' in the 00390 pattern buffer, and one byte with the number of inner groups, 00391 just like `start_memory'. (We need the number of inner 00392 groups here because we don't have any easy way of finding the 00393 corresponding start_memory when we're at a stop_memory.) */ 00394 stop_memory, 00395 00396 /* Match a duplicate of something remembered. Followed by one 00397 byte containing the register number. */ 00398 duplicate, 00399 00400 /* Fail unless at beginning of line. */ 00401 begline, 00402 00403 /* Fail unless at end of line. */ 00404 endline, 00405 00406 /* Succeeds if at beginning of buffer (if emacs) or at beginning 00407 of string to be matched (if not). */ 00408 begbuf, 00409 00410 /* Analogously, for end of buffer/string. */ 00411 endbuf, 00412 00413 /* Followed by two byte relative address to which to jump. */ 00414 jump, 00415 00416 /* Same as jump, but marks the end of an alternative. */ 00417 jump_past_alt, 00418 00419 /* Followed by two-byte relative address of place to resume at 00420 in case of failure. */ 00421 on_failure_jump, 00422 00423 /* Like on_failure_jump, but pushes a placeholder instead of the 00424 current string position when executed. */ 00425 on_failure_keep_string_jump, 00426 00427 /* Throw away latest failure point and then jump to following 00428 two-byte relative address. */ 00429 pop_failure_jump, 00430 00431 /* Change to pop_failure_jump if know won't have to backtrack to 00432 match; otherwise change to jump. This is used to jump 00433 back to the beginning of a repeat. If what follows this jump 00434 clearly won't match what the repeat does, such that we can be 00435 sure that there is no use backtracking out of repetitions 00436 already matched, then we change it to a pop_failure_jump. 00437 Followed by two-byte address. */ 00438 maybe_pop_jump, 00439 00440 /* Jump to following two-byte address, and push a dummy failure 00441 point. This failure point will be thrown away if an attempt 00442 is made to use it for a failure. A `+' construct makes this 00443 before the first repeat. Also used as an intermediary kind 00444 of jump when compiling an alternative. */ 00445 dummy_failure_jump, 00446 00447 /* Push a dummy failure point and continue. Used at the end of 00448 alternatives. */ 00449 push_dummy_failure, 00450 00451 /* Followed by two-byte relative address and two-byte number n. 00452 After matching N times, jump to the address upon failure. */ 00453 succeed_n, 00454 00455 /* Followed by two-byte relative address, and two-byte number n. 00456 Jump to the address N times, then fail. */ 00457 jump_n, 00458 00459 /* Set the following two-byte relative address to the 00460 subsequent two-byte number. The address *includes* the two 00461 bytes of number. */ 00462 set_number_at, 00463 00464 wordchar, /* Matches any word-constituent character. */ 00465 notwordchar, /* Matches any char that is not a word-constituent. */ 00466 00467 wordbeg, /* Succeeds if at word beginning. */ 00468 wordend, /* Succeeds if at word end. */ 00469 00470 wordbound, /* Succeeds if at a word boundary. */ 00471 notwordbound /* Succeeds if not at a word boundary. */ 00472 00473 #ifdef emacs 00474 ,before_dot, /* Succeeds if before point. */ 00475 at_dot, /* Succeeds if at point. */ 00476 after_dot, /* Succeeds if after point. */ 00477 00478 /* Matches any character whose syntax is specified. Followed by 00479 a byte which contains a syntax code, e.g., Sword. */ 00480 syntaxspec, 00481 00482 /* Matches any character whose syntax is not that specified. */ 00483 notsyntaxspec 00484 #endif /* emacs */ 00485 } re_opcode_t;
| static int bcmp_translate _RE_ARGS | ( | (const char *s1, const char *s2, int len, char *translate) | ) | [static] |
| static boolean alt_match_null_string_p _RE_ARGS | ( | (unsigned char *p, unsigned char *end, register_info_type *reg_info) | ) | [static] |
| static boolean common_op_match_null_string_p _RE_ARGS | ( | (unsigned char **p, unsigned char *end, register_info_type *reg_info) | ) | [static] |
| static boolean group_in_compile_stack _RE_ARGS | ( | (compile_stack_typecompile_stack, regnum_t regnum) | ) | [static] |
| static reg_errcode_t compile_range _RE_ARGS | ( | (const char **p_ptr, const char *pend, char *translate, reg_syntax_t syntax, unsigned char *b) | ) | [static] |
| static boolean at_endline_loc_p _RE_ARGS | ( | (const char *p, const char *pend, reg_syntax_t syntax) | ) | [static] |
| static boolean at_begline_loc_p _RE_ARGS | ( | (const char *pattern, const char *p, reg_syntax_t syntax) | ) | [static] |
| static void insert_op2 _RE_ARGS | ( | (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end) | ) | [static] |
| static void insert_op1 _RE_ARGS | ( | (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) | ) | [static] |
| static void store_op2 _RE_ARGS | ( | (re_opcode_t op, unsigned char *loc, int arg1, int arg2) | ) | [static] |
| static void store_op1 _RE_ARGS | ( | (re_opcode_t op, unsigned char *loc, int arg) | ) | [static] |
| static reg_errcode_t regex_compile _RE_ARGS | ( | (const char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp) | ) | [static] |
| static boolean alt_match_null_string_p | ( | unsigned char * | p, | |
| unsigned char * | end, | |||
| register_info_type * | reg_info | |||
| ) | [static] |
Definition at line 5231 of file regex.c.
05231 : 05232 It expects P to be the first byte of a single alternative and END one 05233 byte past the last. The alternative can contain groups. */ 05234 05235 static boolean 05236 alt_match_null_string_p (p, end, reg_info) 05237 unsigned char *p, *end; 05238 register_info_type *reg_info; 05239 { 05240 int mcnt; 05241 unsigned char *p1 = p; 05242 05243 while (p1 < end) 05244 { 05245 /* Skip over opcodes that can match nothing, and break when we get 05246 to one that can't. */ 05247 05248 switch ((re_opcode_t) *p1) 05249 { 05250 /* It's a loop. */ 05251 case on_failure_jump: 05252 p1++; 05253 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05254 p1 += mcnt; 05255 break; 05256 05257 default: 05258 if (!common_op_match_null_string_p (&p1, end, reg_info)) 05259 return false; }
| static boolean at_begline_loc_p | ( | char * | pattern, | |
| char * | p, | |||
| reg_syntax_t | syntax | |||
| ) | const [static] |
| static boolean at_endline_loc_p | ( | char * | p, | |
| char * | pend, | |||
| reg_syntax_t | syntax | |||
| ) | const [static] |
Definition at line 2963 of file regex.c.
02971 { 02972 const char *next = p; 02973 boolean next_backslash = *next == '\\'; 02974 const char *next_next = p + 1 < pend ? p + 1 : 0; 02975 02976 return 02977 /* Before a subexpression? */ 02978 (syntax & RE_NO_BK_PARENS ? *next == ')'
| static int bcmp_translate | ( | char * | s1, | |
| char * | s2, | |||
| int | len, | |||
| RE_TRANSLATE_TYPE | translate | |||
| ) | const [static] |
| static boolean common_op_match_null_string_p | ( | unsigned char ** | p, | |
| unsigned char * | end, | |||
| register_info_type * | reg_info | |||
| ) | [static] |
Definition at line 5268 of file regex.c.
05276 { 05277 int mcnt; 05278 boolean ret; 05279 int reg_no; 05280 unsigned char *p1 = *p; 05281 05282 switch ((re_opcode_t) *p1++) 05283 { 05284 case no_op: 05285 case begline: 05286 case endline: 05287 case begbuf: 05288 case endbuf: 05289 case wordbeg: 05290 case wordend: 05291 case wordbound: 05292 case notwordbound: 05293 #ifdef emacs 05294 case before_dot: 05295 case at_dot: 05296 case after_dot: 05297 #endif 05298 break; 05299 05300 case start_memory: 05301 reg_no = *p1; 05302 assert (reg_no > 0 && reg_no <= MAX_REGNUM); 05303 ret = group_match_null_string_p (&p1, end, reg_info); 05304 05305 /* Have to set this here in case we're checking a group which 05306 contains a group and a back reference to it. */ 05307 05308 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 05309 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 05310 05311 if (!ret) 05312 return false; 05313 break; 05314 05315 /* If this is an optimized succeed_n for zero times, make the jump. */ 05316 case jump: 05317 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05318 if (mcnt >= 0) 05319 p1 += mcnt; 05320 else 05321 return false; 05322 break; 05323 05324 case succeed_n: 05325 /* Get to the number of times to succeed. */ 05326 p1 += 2; 05327 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05328 05329 if (mcnt == 0) 05330 { 05331 p1 -= 4; 05332 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05333 p1 += mcnt; 05334 } 05335 else 05336 return false; 05337 break; 05338 05339 case duplicate: 05340 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 05341 return false; 05342 break; 05343 05344 case set_number_at: 05345 p1 += 4; 05346 05347 default: 05348 /* All other opcodes mean we cannot match the empty string. */ 05349 return false;
| static reg_errcode_t compile_range | ( | char ** | p_ptr, | |
| char * | pend, | |||
| RE_TRANSLATE_TYPE | translate, | |||
| reg_syntax_t | syntax, | |||
| unsigned char * | b | |||
| ) | const [static] |
Definition at line 3013 of file regex.c.
03023 { 03024 unsigned this_char; 03025 03026 const char *p = *p_ptr; 03027 unsigned int range_start, range_end; 03028 03029 if (p == pend) 03030 return REG_ERANGE; 03031 03032 /* Even though the pattern is a signed `char *', we need to fetch 03033 with unsigned char *'s; if the high bit of the pattern character 03034 is set, the range endpoints will be negative if we fetch using a 03035 signed char *. 03036 03037 We also want to fetch the endpoints without translating them; the 03038 appropriate translation is done in the bit-setting loop below. */ 03039 /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ 03040 range_start = ((const unsigned char *) p)[-2]; 03041 range_end = ((const unsigned char *) p)[0]; 03042 03043 /* Have to increment the pointer into the pattern string, so the 03044 caller isn't still at the ending character. */ 03045 (*p_ptr)++; 03046 03047 /* If the start is after the end, the range is empty. */ 03048 if (range_start > range_end) 03049 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 03050 03051 /* Here we see why `this_char' has to be larger than an `unsigned 03052 char' -- the range is inclusive, so if `range_end' == 0xff 03053 (assuming 8-bit characters), we would otherwise go into an infinite 03054 loop, since all characters <= 0xff. */ 03055 for (this_char = range_start; this_char <= range_end; this_char++) 03056 {
| static boolean group_in_compile_stack | ( | compile_stack_type | compile_stack, | |
| regnum_t | regnum | |||
| ) | [static] |
| static boolean group_match_null_string_p | ( | unsigned char ** | p, | |
| unsigned char * | end, | |||
| register_info_type * | reg_info | |||
| ) | [static] |
Definition at line 5122 of file regex.c.
05130 { 05131 int mcnt; 05132 /* Point to after the args to the start_memory. */ 05133 unsigned char *p1 = *p + 2; 05134 05135 while (p1 < end) 05136 { 05137 /* Skip over opcodes that can match nothing, and return true or 05138 false, as appropriate, when we get to one that can't, or to the 05139 matching stop_memory. */ 05140 05141 switch ((re_opcode_t) *p1) 05142 { 05143 /* Could be either a loop or a series of alternatives. */ 05144 case on_failure_jump: 05145 p1++; 05146 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05147 05148 /* If the next operation is not a jump backwards in the 05149 pattern. */ 05150 05151 if (mcnt >= 0) 05152 { 05153 /* Go through the on_failure_jumps of the alternatives, 05154 seeing if any of the alternatives cannot match nothing. 05155 The last alternative starts with only a jump, 05156 whereas the rest start with on_failure_jump and end 05157 with a jump, e.g., here is the pattern for `a|b|c': 05158 05159 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 05160 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 05161 /exactn/1/c 05162 05163 So, we have to first go through the first (n-1) 05164 alternatives and then deal with the last one separately. */ 05165 05166 05167 /* Deal with the first (n-1) alternatives, which start 05168 with an on_failure_jump (see above) that jumps to right 05169 past a jump_past_alt. */ 05170 05171 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) 05172 { 05173 /* `mcnt' holds how many bytes long the alternative 05174 is, including the ending `jump_past_alt' and 05175 its number. */ 05176 05177 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 05178 reg_info)) 05179 return false; 05180 05181 /* Move to right after this alternative, including the 05182 jump_past_alt. */ 05183 p1 += mcnt; 05184 05185 /* Break if it's the beginning of an n-th alternative 05186 that doesn't begin with an on_failure_jump. */ 05187 if ((re_opcode_t) *p1 != on_failure_jump) 05188 break; 05189 05190 /* Still have to check that it's not an n-th 05191 alternative that starts with an on_failure_jump. */ 05192 p1++; 05193 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05194 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) 05195 { 05196 /* Get to the beginning of the n-th alternative. */ 05197 p1 -= 3; 05198 break; 05199 } 05200 } 05201 05202 /* Deal with the last alternative: go back and get number 05203 of the `jump_past_alt' just before it. `mcnt' contains 05204 the length of the alternative. */ 05205 EXTRACT_NUMBER (mcnt, p1 - 2); 05206 05207 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) 05208 return false; 05209 05210 p1 += mcnt; /* Get past the n-th alternative. */ 05211 } /* if mcnt > 0 */ 05212 break; 05213 05214 05215 case stop_memory: 05216 assert (p1[1] == **p); 05217 *p = p1 + 2; 05218 return true; 05219 05220 05221 default: 05222 if (!common_op_match_null_string_p (&p1, end, reg_info)) 05223 return false;
| static void init_syntax_once | ( | ) | [static] |
Definition at line 146 of file regex.c.
00147 { 00148 register int c; 00149 static int done = 0; 00150 00151 if (done) 00152 return; 00153 00154 bzero (re_syntax_table, sizeof re_syntax_table); 00155 00156 for (c = 'a'; c <= 'z'; c++) 00157 re_syntax_table[c] = Sword; 00158 00159 for (c = 'A'; c <= 'Z'; c++) 00160 re_syntax_table[c] = Sword; 00161 00162 for (c = '0'; c <= '9'; c++) 00163 re_syntax_table[c] = Sword; 00164 00165 re_syntax_table['_'] = Sword; 00166 00167 done = 1; 00168 }
| static void insert_op1 | ( | re_opcode_t | op, | |
| unsigned char * | loc, | |||
| int | arg, | |||
| unsigned char * | end | |||
| ) | [static] |
| static void insert_op2 | ( | re_opcode_t | op, | |
| unsigned char * | loc, | |||
| int | arg1, | |||
| int | arg2, | |||
| unsigned char * | end | |||
| ) | [static] |
| int re_compile_fastmap | ( | struct re_pattern_buffer * | bufp | ) |
Definition at line 3072 of file regex.c.
03079 { 03080 int j, k; 03081 #ifdef MATCH_MAY_ALLOCATE 03082 fail_stack_type fail_stack; 03083 #endif 03084 #ifndef REGEX_MALLOC 03085 char *destination; 03086 #endif 03087 /* We don't push any register information onto the failure stack. */ 03088 //sword unsigned num_regs = 0; 03089 03090 register char *fastmap = bufp->fastmap; 03091 unsigned char *pattern = bufp->buffer; 03092 unsigned char *p = pattern; 03093 register unsigned char *pend = pattern + bufp->used; 03094 03095 #ifdef REL_ALLOC 03096 /* This holds the pointer to the failure stack, when 03097 it is allocated relocatably. */ 03098 fail_stack_elt_t *failure_stack_ptr; 03099 #endif 03100 03101 /* Assume that each path through the pattern can be null until 03102 proven otherwise. We set this false at the bottom of switch 03103 statement, to which we get only if a particular path doesn't 03104 match the empty string. */ 03105 boolean path_can_be_null = true; 03106 03107 /* We aren't doing a `succeed_n' to begin with. */ 03108 boolean succeed_n_p = false; 03109 03110 assert (fastmap != NULL && p != NULL); 03111 03112 INIT_FAIL_STACK (); 03113 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 03114 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 03115 bufp->can_be_null = 0; 03116 03117 while (1) 03118 { 03119 if (p == pend || *p == succeed) 03120 { 03121 /* We have reached the (effective) end of pattern. */ 03122 if (!FAIL_STACK_EMPTY ()) 03123 { 03124 bufp->can_be_null |= path_can_be_null; 03125 03126 /* Reset for next path. */ 03127 path_can_be_null = true; 03128 03129 p = fail_stack.stack[--fail_stack.avail].pointer; 03130 03131 continue; 03132 } 03133 else 03134 break; 03135 } 03136 03137 /* We should never be about to go beyond the end of the pattern. */ 03138 assert (p < pend); 03139 03140 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 03141 { 03142 03143 /* I guess the idea here is to simply not bother with a fastmap 03144 if a backreference is used, since it's too hard to figure out 03145 the fastmap for the corresponding group. Setting 03146 `can_be_null' stops `re_search_2' from using the fastmap, so 03147 that is all we do. */ 03148 case duplicate: 03149 bufp->can_be_null = 1; 03150 goto done; 03151 03152 03153 /* Following are the cases which match a character. These end 03154 with `break'. */ 03155 03156 case exactn: 03157 fastmap[p[1]] = 1; 03158 break; 03159 03160 03161 case charset: 03162 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 03163 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 03164 fastmap[j] = 1; 03165 break; 03166 03167 03168 case charset_not: 03169 /* Chars beyond end of map must be allowed. */ 03170 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 03171 fastmap[j] = 1; 03172 03173 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 03174 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 03175 fastmap[j] = 1; 03176 break; 03177 03178 03179 case wordchar: 03180 for (j = 0; j < (1 << BYTEWIDTH); j++) 03181 if (SYNTAX (j) == Sword) 03182 fastmap[j] = 1; 03183 break; 03184 03185 03186 case notwordchar: 03187 for (j = 0; j < (1 << BYTEWIDTH); j++) 03188 if (SYNTAX (j) != Sword) 03189 fastmap[j] = 1; 03190 break; 03191 03192 03193 case anychar: 03194 { 03195 int fastmap_newline = fastmap['\n']; 03196 03197 /* `.' matches anything ... */ 03198 for (j = 0; j < (1 << BYTEWIDTH); j++) 03199 fastmap[j] = 1; 03200 03201 /* ... except perhaps newline. */ 03202 if (!(bufp->syntax & RE_DOT_NEWLINE)) 03203 fastmap['\n'] = fastmap_newline; 03204 03205 /* Return if we have already set `can_be_null'; if we have, 03206 then the fastmap is irrelevant. Something's wrong here. */ 03207 else if (bufp->can_be_null) 03208 goto done; 03209 03210 /* Otherwise, have to check alternative paths. */ 03211 break; 03212 } 03213 03214 #ifdef emacs 03215 case syntaxspec: 03216 k = *p++; 03217 for (j = 0; j < (1 << BYTEWIDTH); j++) 03218 if (SYNTAX (j) == (enum syntaxcode) k) 03219 fastmap[j] = 1; 03220 break; 03221 03222 03223 case notsyntaxspec: 03224 k = *p++; 03225 for (j = 0; j < (1 << BYTEWIDTH); j++) 03226 if (SYNTAX (j) != (enum syntaxcode) k) 03227 fastmap[j] = 1; 03228 break; 03229 03230 03231 /* All cases after this match the empty string. These end with 03232 `continue'. */ 03233 03234 03235 case before_dot: 03236 case at_dot: 03237 case after_dot: 03238 continue; 03239 #endif /* emacs */ 03240 03241 03242 case no_op: 03243 case begline: 03244 case endline: 03245 case begbuf: 03246 case endbuf: 03247 case wordbound: 03248 case notwordbound: 03249 case wordbeg: 03250 case wordend: 03251 case push_dummy_failure: 03252 continue; 03253 03254 03255 case jump_n: 03256 case pop_failure_jump: 03257 case maybe_pop_jump: 03258 case jump: 03259 case jump_past_alt: 03260 case dummy_failure_jump: 03261 EXTRACT_NUMBER_AND_INCR (j, p); 03262 p += j; 03263 if (j > 0) 03264 continue; 03265 03266 /* Jump backward implies we just went through the body of a 03267 loop and matched nothing. Opcode jumped to should be 03268 `on_failure_jump' or `succeed_n'. Just treat it like an 03269 ordinary jump. For a * loop, it has pushed its failure 03270 point already; if so, discard that as redundant. */ 03271 if ((re_opcode_t) *p != on_failure_jump 03272 && (re_opcode_t) *p != succeed_n) 03273 continue; 03274 03275 p++; 03276 EXTRACT_NUMBER_AND_INCR (j, p); 03277 p += j; 03278 03279 /* If what's on the stack is where we are now, pop it. */ 03280 if (!FAIL_STACK_EMPTY () 03281 && fail_stack.stack[fail_stack.avail - 1].pointer == p) 03282 fail_stack.avail--; 03283 03284 continue; 03285 03286 03287 case on_failure_jump: 03288 case on_failure_keep_string_jump: 03289 handle_on_failure_jump: 03290 EXTRACT_NUMBER_AND_INCR (j, p); 03291 03292 /* For some patterns, e.g., `(a?)?', `p+j' here points to the 03293 end of the pattern. We don't want to push such a point, 03294 since when we restore it above, entering the switch will 03295 increment `p' past the end of the pattern. We don't need 03296 to push such a point since we obviously won't find any more 03297 fastmap entries beyond `pend'. Such a pattern can match 03298 the null string, though. */ 03299 if (p + j < pend) 03300 { 03301 if (!PUSH_PATTERN_OP (p + j, fail_stack)) 03302 { 03303 RESET_FAIL_STACK (); 03304 return -2; 03305 } 03306 } 03307 else 03308 bufp->can_be_null = 1; 03309 03310 if (succeed_n_p) 03311 { 03312 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ 03313 succeed_n_p = false; 03314 } 03315 03316 continue; 03317 03318 03319 case succeed_n: 03320 /* Get to the number of times to succeed. */ 03321 p += 2; 03322 03323 /* Increment p past the n for when k != 0. */ 03324 EXTRACT_NUMBER_AND_INCR (k, p); 03325 if (k == 0) 03326 { 03327 p -= 4; 03328 succeed_n_p = true; /* Spaghetti code alert. */ 03329 goto handle_on_failure_jump; 03330 } 03331 continue; 03332 03333 03334 case set_number_at: 03335 p += 4; 03336 continue; 03337 03338 03339 case start_memory: 03340 case stop_memory: 03341 p += 2; 03342 continue; 03343 03344 03345 default: 03346 abort (); /* We have listed all the cases. */ 03347 } /* switch *p++ */ 03348 03349 /* Getting here means we have found the possible starting 03350 characters for one path of the pattern -- and that the empty 03351 string does not match. We need not follow this path further. 03352 Instead, look at the next alternative (remembered on the 03353 stack), or quit if no more. The test at the top of the loop 03354 does these things. */ 03355 path_can_be_null = false; 03356 p = pend; 03357 } /* while p */ 03358 03359 /* Set `can_be_null' for the last path (also the first path, if the 03360 pattern is empty). */ 03361 bufp->can_be_null |= path_can_be_null;
| const char* re_compile_pattern | ( | char * | pattern, | |
| size_t | length, | |||
| struct re_pattern_buffer * | bufp | |||
| ) | const |
Definition at line 5383 of file regex.c.
05392 { 05393 reg_errcode_t ret; 05394 05395 /* GNU code is written to assume at least RE_NREGS registers will be set 05396 (and at least one extra will be -1). */ 05397 bufp->regs_allocated = REGS_UNALLOCATED; 05398 05399 /* And GNU code determines whether or not to get register information 05400 by passing null for the REGS argument to re_match, etc., not by 05401 setting no_sub. */ 05402 bufp->no_sub = 0; 05403 05404 /* Match anchors at newline. */ 05405 bufp->newline_anchor = 1; 05406 05407 ret = regex_compile (pattern, length, re_syntax_options, bufp);
| int re_match | ( | struct re_pattern_buffer * | bufp, | |
| const char * | string, | |||
| int | size, | |||
| int | pos, | |||
| struct re_registers * | regs | |||
| ) |
Definition at line 3655 of file regex.c.
03665 { 03666 int result = re_match_2_internal (bufp, NULL, 0, string, size, 03667 pos, regs, size); 03668 #ifndef REGEX_MALLOC 03669 #ifdef C_ALLOCA
| int re_match_2 | ( | struct re_pattern_buffer * | bufp, | |
| const char * | string1, | |||
| int | size1, | |||
| const char * | string2, | |||
| int | size2, | |||
| int | pos, | |||
| struct re_registers * | regs, | |||
| int | stop | |||
| ) |
Definition at line 3698 of file regex.c.
03710 { 03711 int result = re_match_2_internal (bufp, string1, size1, string2, size2, 03712 pos, regs, stop); 03713 #ifndef REGEX_MALLOC 03714 #ifdef C_ALLOCA
| static int re_match_2_internal | ( | struct re_pattern_buffer * | bufp, | |
| const char * | string1, | |||
| int | size1, | |||
| const char * | string2, | |||
| int | size2, | |||
| int | pos, | |||
| struct re_registers * | regs, | |||
| int | stop | |||
| ) | [static] |
Definition at line 3719 of file regex.c.
03731 { 03732 /* General temporaries. */ 03733 int mcnt; 03734 unsigned char *p1; 03735 03736 /* Just past the end of the corresponding string. */ 03737 const char *end1, *end2; 03738 03739 /* Pointers into string1 and string2, just past the last characters in 03740 each to consider matching. */ 03741 const char *end_match_1, *end_match_2; 03742 03743 /* Where we are in the data, and the end of the current string. */ 03744 const char *d, *dend; 03745 03746 /* Where we are in the pattern, and the end of the pattern. */ 03747 unsigned char *p = bufp->buffer; 03748 register unsigned char *pend = p + bufp->used; 03749 03750 /* Mark the opcode just after a start_memory, so we can test for an 03751 empty subpattern when we get to the stop_memory. */ 03752 unsigned char *just_past_start_mem = 0; 03753 03754 /* We use this to map every character in the string. */ 03755 RE_TRANSLATE_TYPE translate = bufp->translate; 03756 03757 /* Failure point stack. Each place that can handle a failure further 03758 down the line pushes a failure point on this stack. It consists of 03759 restart, regend, and reg_info for all registers corresponding to 03760 the subexpressions we're currently inside, plus the number of such 03761 registers, and, finally, two char *'s. The first char * is where 03762 to resume scanning the pattern; the second one is where to resume 03763 scanning the strings. If the latter is zero, the failure point is 03764 a ``dummy''; if a failure happens and the failure point is a dummy, 03765 it gets discarded and the next next one is tried. */ 03766 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 03767 fail_stack_type fail_stack; 03768 #endif 03769 #ifdef DEBUG 03770 static unsigned failure_id = 0; 03771 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 03772 #endif 03773 03774 #ifdef REL_ALLOC 03775 /* This holds the pointer to the failure stack, when 03776 it is allocated relocatably. */ 03777 fail_stack_elt_t *failure_stack_ptr; 03778 #endif 03779 03780 /* We fill all the registers internally, independent of what we 03781 return, for use in backreferences. The number here includes 03782 an element for register zero. */ 03783 size_t num_regs = bufp->re_nsub + 1; 03784 03785 /* The currently active registers. */ 03786 active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; 03787 active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; 03788 03789 /* Information on the contents of registers. These are pointers into 03790 the input strings; they record just what was matched (on this 03791 attempt) by a subexpression part of the pattern, that is, the 03792 regnum-th regstart pointer points to where in the pattern we began 03793 matching and the regnum-th regend points to right after where we 03794 stopped matching the regnum-th subexpression. (The zeroth register 03795 keeps track of what the whole pattern matches.) */ 03796 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 03797 const char **regstart, **regend; 03798 #endif 03799 03800 /* If a group that's operated upon by a repetition operator fails to 03801 match anything, then the register for its start will need to be 03802 restored because it will have been set to wherever in the string we 03803 are when we last see its open-group operator. Similarly for a 03804 register's end. */ 03805 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 03806 const char **old_regstart, **old_regend; 03807 #endif 03808 03809 /* The is_active field of reg_info helps us keep track of which (possibly 03810 nested) subexpressions we are currently in. The matched_something 03811 field of reg_info[reg_num] helps us tell whether or not we have 03812 matched any of the pattern so far this time through the reg_num-th 03813 subexpression. These two fields get reset each time through any 03814 loop their register is in. */ 03815 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 03816 register_info_type *reg_info; 03817 #endif 03818 03819 /* The following record the register info as found in the above 03820 variables when we find a match better than any we've seen before. 03821 This happens as we backtrack through the failure points, which in 03822 turn happens only if we have not yet matched the entire string. */ 03823 unsigned best_regs_set = false; 03824 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 03825 const char **best_regstart, **best_regend; 03826 #endif 03827 03828 /* Logically, this is `best_regend[0]'. But we don't want to have to 03829 allocate space for that if we're not allocating space for anything 03830 else (see below). Also, we never need info about register 0 for 03831 any of the other register vectors, and it seems rather a kludge to 03832 treat `best_regend' differently than the rest. So we keep track of 03833 the end of the best match so far in a separate variable. We 03834 initialize this to NULL so that when we backtrack the first time 03835 and need to test it, it's not garbage. */ 03836 const char *match_end = NULL; 03837 03838 /* This helps SET_REGS_MATCHED avoid doing redundant work. */ 03839 int set_regs_matched_done = 0; 03840 03841 /* Used when we pop values we don't care about. */ 03842 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 03843 const char **reg_dummy; 03844 register_info_type *reg_info_dummy; 03845 #endif 03846 03847 #ifdef DEBUG 03848 /* Counts the total number of registers pushed. */ 03849 unsigned num_regs_pushed = 0; 03850 #endif 03851 03852 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 03853 03854 INIT_FAIL_STACK (); 03855 03856 #ifdef MATCH_MAY_ALLOCATE 03857 /* Do not bother to initialize all the register variables if there are 03858 no groups in the pattern, as it takes a fair amount of time. If 03859 there are groups, we include space for register 0 (the whole 03860 pattern), even though we never use it, since it simplifies the 03861 array indexing. We should fix this. */ 03862 if (bufp->re_nsub) 03863 { 03864 regstart = REGEX_TALLOC (num_regs, const char *); 03865 regend = REGEX_TALLOC (num_regs, const char *); 03866 old_regstart = REGEX_TALLOC (num_regs, const char *); 03867 old_regend = REGEX_TALLOC (num_regs, const char *); 03868 best_regstart = REGEX_TALLOC (num_regs, const char *); 03869 best_regend = REGEX_TALLOC (num_regs, const char *); 03870 reg_info = REGEX_TALLOC (num_regs, register_info_type); 03871 reg_dummy = REGEX_TALLOC (num_regs, const char *); 03872 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); 03873 03874 if (!(regstart && regend && old_regstart && old_regend && reg_info 03875 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 03876 { 03877 FREE_VARIABLES (); 03878 return -2; 03879 } 03880 } 03881 else 03882 { 03883 /* We must initialize all our variables to NULL, so that 03884 `FREE_VARIABLES' doesn't try to free them. */ 03885 regstart = regend = old_regstart = old_regend = best_regstart 03886 = best_regend = reg_dummy = NULL; 03887 reg_info = reg_info_dummy = (register_info_type *) NULL; 03888 } 03889 #endif /* MATCH_MAY_ALLOCATE */ 03890 03891 /* The starting position is bogus. */ 03892 if (pos < 0 || pos > size1 + size2) 03893 { 03894 FREE_VARIABLES (); 03895 return -1; 03896 } 03897 03898 /* Initialize subexpression text positions to -1 to mark ones that no 03899 start_memory/stop_memory has been seen for. Also initialize the 03900 register information struct. */ 03901 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 03902 { 03903 regstart[mcnt] = regend[mcnt] 03904 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 03905 03906 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 03907 IS_ACTIVE (reg_info[mcnt]) = 0; 03908 MATCHED_SOMETHING (reg_info[mcnt]) = 0; 03909 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 03910 } 03911 03912 /* We move `string1' into `string2' if the latter's empty -- but not if 03913 `string1' is null. */ 03914 if (size2 == 0 && string1 != NULL) 03915 { 03916 string2 = string1; 03917 size2 = size1; 03918 string1 = 0; 03919 size1 = 0; 03920 } 03921 end1 = string1 + size1; 03922 end2 = string2 + size2; 03923 03924 /* Compute where to stop matching, within the two strings. */ 03925 if (stop <= size1) 03926 { 03927 end_match_1 = string1 + stop; 03928 end_match_2 = string2; 03929 } 03930 else 03931 { 03932 end_match_1 = end1; 03933 end_match_2 = string2 + stop - size1; 03934 } 03935 03936 /* `p' scans through the pattern as `d' scans through the data. 03937 `dend' is the end of the input string that `d' points within. `d' 03938 is advanced into the following input string whenever necessary, but 03939 this happens before fetching; therefore, at the beginning of the 03940 loop, `d' can be pointing at the end of a string, but it cannot 03941 equal `string2'. */ 03942 if (size1 > 0 && pos <= size1) 03943 { 03944 d = string1 + pos; 03945 dend = end_match_1; 03946 } 03947 else 03948 { 03949 d = string2 + pos - size1; 03950 dend = end_match_2; 03951 } 03952 03953 DEBUG_PRINT1 ("The compiled pattern is:\n"); 03954 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 03955 DEBUG_PRINT1 ("The string to match is: `"); 03956 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 03957 DEBUG_PRINT1 ("'\n"); 03958 03959 /* This loops over pattern commands. It exits by returning from the 03960 function if the match is complete, or it drops through if the match 03961 fails at this starting point in the input data. */ 03962 for (;;) 03963 { 03964 #ifdef _LIBC 03965 DEBUG_PRINT2 ("\n%p: ", p); 03966 #else 03967 DEBUG_PRINT2 ("\n0x%x: ", p); 03968 #endif 03969 03970 if (p == pend) 03971 { /* End of pattern means we might have succeeded. */ 03972 DEBUG_PRINT1 ("end of pattern ... "); 03973 03974 /* If we haven't matched the entire string, and we want the 03975 longest match, try backtracking. */ 03976 if (d != end_match_2) 03977 { 03978 /* 1 if this match ends in the same string (string1 or string2) 03979 as the best previous match. */ 03980 boolean same_str_p = (FIRST_STRING_P (match_end) 03981 == MATCHING_IN_FIRST_STRING); 03982 /* 1 if this match is the best seen so far. */ 03983 boolean best_match_p; 03984 03985 /* AIX compiler got confused when this was combined 03986 with the previous declaration. */ 03987 if (same_str_p) 03988 best_match_p = d > match_end; 03989 else 03990 best_match_p = !MATCHING_IN_FIRST_STRING; 03991 03992 DEBUG_PRINT1 ("backtracking.\n"); 03993 03994 if (!FAIL_STACK_EMPTY ()) 03995 { /* More failure points to try. */ 03996 03997 /* If exceeds best match so far, save it. */ 03998 if (!best_regs_set || best_match_p) 03999 { 04000 best_regs_set = true; 04001 match_end = d; 04002 04003 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 04004 04005 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 04006 { 04007 best_regstart[mcnt] = regstart[mcnt]; 04008 best_regend[mcnt] = regend[mcnt]; 04009 } 04010 } 04011 goto fail; 04012 } 04013 04014 /* If no failure points, don't restore garbage. And if 04015 last match is real best match, don't restore second 04016 best one. */ 04017 else if (best_regs_set && !best_match_p) 04018 { 04019 restore_best_regs: 04020 /* Restore best match. It may happen that `dend == 04021 end_match_1' while the restored d is in string2. 04022 For example, the pattern `x.*y.*z' against the 04023 strings `x-' and `y-z-', if the two strings are 04024 not consecutive in memory. */ 04025 DEBUG_PRINT1 ("Restoring best registers.\n"); 04026 04027 d = match_end; 04028 dend = ((d >= string1 && d <= end1) 04029 ? end_match_1 : end_match_2); 04030 04031 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 04032 { 04033 regstart[mcnt] = best_regstart[mcnt]; 04034 regend[mcnt] = best_regend[mcnt]; 04035 } 04036 } 04037 } /* d != end_match_2 */ 04038 04039 succeed_label: 04040 DEBUG_PRINT1 ("Accepting match.\n"); 04041 04042 /* If caller wants register contents data back, do it. */ 04043 if (regs && !bufp->no_sub) 04044 { 04045 /* Have the register data arrays been allocated? */ 04046 if (bufp->regs_allocated == REGS_UNALLOCATED) 04047 { /* No. So allocate them with malloc. We need one 04048 extra element beyond `num_regs' for the `-1' marker 04049 GNU code uses. */ 04050 regs->num_regs = MAX (RE_NREGS, num_regs + 1); 04051 regs->start = TALLOC (regs->num_regs, regoff_t); 04052 regs->end = TALLOC (regs->num_regs, regoff_t); 04053 if (regs->start == NULL || regs->end == NULL) 04054 { 04055 FREE_VARIABLES (); 04056 return -2; 04057 } 04058 bufp->regs_allocated = REGS_REALLOCATE; 04059 } 04060 else if (bufp->regs_allocated == REGS_REALLOCATE) 04061 { /* Yes. If we need more elements than were already 04062 allocated, reallocate them. If we need fewer, just 04063 leave it alone. */ 04064 if (regs->num_regs < num_regs + 1) 04065 { 04066 regs->num_regs = num_regs + 1; 04067 RETALLOC (regs->start, regs->num_regs, regoff_t); 04068 RETALLOC (regs->end, regs->num_regs, regoff_t); 04069 if (regs->start == NULL || regs->end == NULL) 04070 { 04071 FREE_VARIABLES (); 04072 return -2; 04073 } 04074 } 04075 } 04076 else 04077 { 04078 /* These braces fend off a "empty body in an else-statement" 04079 warning under GCC when assert expands to nothing. */ 04080 assert (bufp->regs_allocated == REGS_FIXED); 04081 } 04082 04083 /* Convert the pointer data in `regstart' and `regend' to 04084 indices. Register zero has to be set differently, 04085 since we haven't kept track of any info for it. */ 04086 if (regs->num_regs > 0) 04087 { 04088 regs->start[0] = pos; 04089 regs->end[0] = (MATCHING_IN_FIRST_STRING 04090 ? ((regoff_t) (d - string1)) 04091 : ((regoff_t) (d - string2 + size1))); 04092 } 04093 04094 /* Go through the first `min (num_regs, regs->num_regs)' 04095 registers, since that is all we initialized. */ 04096 for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); 04097 mcnt++) 04098 { 04099 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 04100 regs->start[mcnt] = regs->end[mcnt] = -1; 04101 else 04102 { 04103 regs->start[mcnt] 04104 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 04105 regs->end[mcnt] 04106 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 04107 } 04108 } 04109 04110 /* If the regs structure we return has more elements than 04111 were in the pattern, set the extra elements to -1. If 04112 we (re)allocated the registers, this is the case, 04113 because we always allocate enough to have at least one 04114 -1 at the end. */ 04115 for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) 04116 regs->start[mcnt] = regs->end[mcnt] = -1; 04117 } /* regs && !bufp->no_sub */ 04118 04119 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 04120 nfailure_points_pushed, nfailure_points_popped, 04121 nfailure_points_pushed - nfailure_points_popped); 04122 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 04123 04124 mcnt = d - pos - (MATCHING_IN_FIRST_STRING 04125 ? string1 04126 : string2 - size1); 04127 04128 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 04129 04130 FREE_VARIABLES (); 04131 return mcnt; 04132 } 04133 04134 /* Otherwise match next pattern command. */ 04135 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 04136 { 04137 /* Ignore these. Used to ignore the n of succeed_n's which 04138 currently have n == 0. */ 04139 case no_op: 04140 DEBUG_PRINT1 ("EXECUTING no_op.\n"); 04141 break; 04142 04143 case succeed: 04144 DEBUG_PRINT1 ("EXECUTING succeed.\n"); 04145 goto succeed_label; 04146 04147 /* Match the next n pattern characters exactly. The following 04148 byte in the pattern defines n, and the n bytes after that 04149 are the characters to match. */ 04150 case exactn: 04151 mcnt = *p++; 04152 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 04153 04154 /* This is written out as an if-else so we don't waste time 04155 testing `translate' inside the loop. */ 04156 if (translate) 04157 { 04158 do 04159 { 04160 PREFETCH (); 04161 if ((unsigned char) translate[(unsigned char) *d++] 04162 != (unsigned char) *p++) 04163 goto fail; 04164 } 04165 while (--mcnt); 04166 } 04167 else 04168 { 04169 do 04170 { 04171 PREFETCH (); 04172 if (*d++ != (char) *p++) goto fail; 04173 } 04174 while (--mcnt); 04175 } 04176 SET_REGS_MATCHED (); 04177 break; 04178 04179 04180 /* Match any character except possibly a newline or a null. */ 04181 case anychar: 04182 DEBUG_PRINT1 ("EXECUTING anychar.\n"); 04183 04184 PREFETCH (); 04185 04186 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 04187 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) 04188 goto fail; 04189 04190 SET_REGS_MATCHED (); 04191 DEBUG_PRINT2 (" Matched `%d'.\n", *d); 04192 d++; 04193 break; 04194 04195 04196 case charset: 04197 case charset_not: 04198 { 04199 register unsigned char c; 04200 boolean not = (re_opcode_t) *(p - 1) == charset_not; 04201 04202 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 04203 04204 PREFETCH (); 04205 c = TRANSLATE (*d); /* The character to match. */ 04206 04207 /* Cast to `unsigned' instead of `unsigned char' in case the 04208 bit list is a full 32 bytes long. */ 04209 if (c < (unsigned) (*p * BYTEWIDTH) 04210 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 04211 not = !not; 04212 04213 p += 1 + *p; 04214 04215 if (!not) goto fail; 04216 04217 SET_REGS_MATCHED (); 04218 d++; 04219 break; 04220 } 04221 04222 04223 /* The beginning of a group is represented by start_memory. 04224 The arguments are the register number in the next byte, and the 04225 number of groups inner to this one in the next. The text 04226 matched within the group is recorded (in the internal 04227 registers data structure) under the register number. */ 04228 case start_memory: 04229 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); 04230 04231 /* Find out if this group can match the empty string. */ 04232 p1 = p; /* To send to group_match_null_string_p. */ 04233 04234 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 04235 REG_MATCH_NULL_STRING_P (reg_info[*p]) 04236 = group_match_null_string_p (&p1, pend, reg_info); 04237 04238 /* Save the position in the string where we were the last time 04239 we were at this open-group operator in case the group is 04240 operated upon by a repetition operator, e.g., with `(a*)*b' 04241 against `ab'; then we want to ignore where we are now in 04242 the string in case this attempt to match fails. */ 04243 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 04244 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 04245 : regstart[*p]; 04246 DEBUG_PRINT2 (" old_regstart: %d\n", 04247 POINTER_TO_OFFSET (old_regstart[*p])); 04248 04249 regstart[*p] = d; 04250 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 04251 04252 IS_ACTIVE (reg_info[*p]) = 1; 04253 MATCHED_SOMETHING (reg_info[*p]) = 0; 04254 04255 /* Clear this whenever we change the register activity status. */ 04256 set_regs_matched_done = 0; 04257 04258 /* This is the new highest active register. */ 04259 highest_active_reg = *p; 04260 04261 /* If nothing was active before, this is the new lowest active 04262 register. */ 04263 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 04264 lowest_active_reg = *p; 04265 04266 /* Move past the register number and inner group count. */ 04267 p += 2; 04268 just_past_start_mem = p; 04269 04270 break; 04271 04272 04273 /* The stop_memory opcode represents the end of a group. Its 04274 arguments are the same as start_memory's: the register 04275 number, and the number of inner groups. */ 04276 case stop_memory: 04277 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); 04278 04279 /* We need to save the string position the last time we were at 04280 this close-group operator in case the group is operated 04281 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 04282 against `aba'; then we want to ignore where we are now in 04283 the string in case this attempt to match fails. */ 04284 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 04285 ? REG_UNSET (regend[*p]) ? d : regend[*p] 04286 : regend[*p]; 04287 DEBUG_PRINT2 (" old_regend: %d\n", 04288 POINTER_TO_OFFSET (old_regend[*p])); 04289 04290 regend[*p] = d; 04291 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 04292 04293 /* This register isn't active anymore. */ 04294 IS_ACTIVE (reg_info[*p]) = 0; 04295 04296 /* Clear this whenever we change the register activity status. */ 04297 set_regs_matched_done = 0; 04298 04299 /* If this was the only register active, nothing is active 04300 anymore. */ 04301 if (lowest_active_reg == highest_active_reg) 04302 { 04303 lowest_active_reg = NO_LOWEST_ACTIVE_REG; 04304 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 04305 } 04306 else 04307 { /* We must scan for the new highest active register, since 04308 it isn't necessarily one less than now: consider 04309 (a(b)c(d(e)f)g). When group 3 ends, after the f), the 04310 new highest active register is 1. */ 04311 unsigned char r = *p - 1; 04312 while (r > 0 && !IS_ACTIVE (reg_info[r])) 04313 r--; 04314 04315 /* If we end up at register zero, that means that we saved 04316 the registers as the result of an `on_failure_jump', not 04317 a `start_memory', and we jumped to past the innermost 04318 `stop_memory'. For example, in ((.)*) we save 04319 registers 1 and 2 as a result of the *, but when we pop 04320 back to the second ), we are at the stop_memory 1. 04321 Thus, nothing is active. */ 04322 if (r == 0) 04323 { 04324 lowest_active_reg = NO_LOWEST_ACTIVE_REG; 04325 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 04326 } 04327 else 04328 highest_active_reg = r; 04329 } 04330 04331 /* If just failed to match something this time around with a 04332 group that's operated on by a repetition operator, try to 04333 force exit from the ``loop'', and restore the register 04334 information for this group that we had before trying this 04335 last match. */ 04336 if ((!MATCHED_SOMETHING (reg_info[*p]) 04337 || just_past_start_mem == p - 1) 04338 && (p + 2) < pend) 04339 { 04340 boolean is_a_jump_n = false; 04341 04342 p1 = p + 2; 04343 mcnt = 0; 04344 switch ((re_opcode_t) *p1++) 04345 { 04346 case jump_n: 04347 is_a_jump_n = true; 04348 case pop_failure_jump: 04349 case maybe_pop_jump: 04350 case jump: 04351 case dummy_failure_jump: 04352 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 04353 if (is_a_jump_n) 04354 p1 += 2; 04355 break; 04356 04357 default: 04358 /* do nothing */ ; 04359 } 04360 p1 += mcnt; 04361 04362 /* If the next operation is a jump backwards in the pattern 04363 to an on_failure_jump right before the start_memory 04364 corresponding to this stop_memory, exit from the loop 04365 by forcing a failure after pushing on the stack the 04366 on_failure_jump's jump in the pattern, and d. */ 04367 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 04368 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) 04369 { 04370 /* If this group ever matched anything, then restore 04371 what its registers were before trying this last 04372 failed match, e.g., with `(a*)*b' against `ab' for 04373 regstart[1], and, e.g., with `((a*)*(b*)*)*' 04374 against `aba' for regend[3]. 04375 04376 Also restore the registers for inner groups for, 04377 e.g., `((a*)(b*))*' against `aba' (register 3 would 04378 otherwise get trashed). */ 04379 04380 if (EVER_MATCHED_SOMETHING (reg_info[*p])) 04381 { 04382 unsigned r; 04383 04384 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 04385 04386 /* Restore this and inner groups' (if any) registers. */ 04387 for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); 04388 r++) 04389 { 04390 regstart[r] = old_regstart[r]; 04391 04392 /* xx why this test? */ 04393 if (old_regend[r] >= regstart[r]) 04394 regend[r] = old_regend[r]; 04395 } 04396 } 04397 p1++; 04398 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 04399 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 04400 04401 goto fail; 04402 } 04403 } 04404 04405 /* Move past the register number and the inner group count. */ 04406 p += 2; 04407 break; 04408 04409 04410 /* <digit> has been turned into a `duplicate' command which is 04411 followed by the numeric value of <digit> as the register number. */ 04412 case duplicate: 04413 { 04414 register const char *d2, *dend2; 04415 int regno = *p++; /* Get which register to match against. */ 04416 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 04417 04418 /* Can't back reference a group which we've never matched. */ 04419 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 04420 goto fail; 04421 04422 /* Where in input to try to start matching. */ 04423 d2 = regstart[regno]; 04424 04425 /* Where to stop matching; if both the place to start and 04426 the place to stop matching are in the same string, then 04427 set to the place to stop, otherwise, for now have to use 04428 the end of the first string. */ 04429 04430 dend2 = ((FIRST_STRING_P (regstart[regno]) 04431 == FIRST_STRING_P (regend[regno])) 04432 ? regend[regno] : end_match_1); 04433 for (;;) 04434 { 04435 /* If necessary, advance to next segment in register 04436 contents. */ 04437 while (d2 == dend2) 04438 { 04439 if (dend2 == end_match_2) break; 04440 if (dend2 == regend[regno]) break; 04441 04442 /* End of string1 => advance to string2. */ 04443 d2 = string2; 04444 dend2 = regend[regno]; 04445 } 04446 /* At end of register contents => success */ 04447 if (d2 == dend2) break; 04448 04449 /* If necessary, advance to next segment in data. */ 04450 PREFETCH (); 04451 04452 /* How many characters left in this segment to match. */ 04453 mcnt = dend - d; 04454 04455 /* Want how many consecutive characters we can match in 04456 one shot, so, if necessary, adjust the count. */ 04457 if (mcnt > dend2 - d2) 04458 mcnt = dend2 - d2; 04459 04460 /* Compare that many; failure if mismatch, else move 04461 past them. */ 04462 if (translate 04463 ? bcmp_translate (d, d2, mcnt, translate) 04464 : bcmp (d, d2, mcnt)) 04465 goto fail; 04466 d += mcnt, d2 += mcnt; 04467 04468 /* Do this because we've match some characters. */ 04469 SET_REGS_MATCHED (); 04470 } 04471 } 04472 break; 04473 04474 04475 /* begline matches the empty string at the beginning of the string 04476 (unless `not_bol' is set in `bufp'), and, if 04477 `newline_anchor' is set, after newlines. */ 04478 case begline: 04479 DEBUG_PRINT1 ("EXECUTING begline.\n"); 04480 04481 if (AT_STRINGS_BEG (d)) 04482 { 04483 if (!bufp->not_bol) break; 04484 } 04485 else if (d[-1] == '\n' && bufp->newline_anchor) 04486 { 04487 break; 04488 } 04489 /* In all other cases, we fail. */ 04490 goto fail; 04491 04492 04493 /* endline is the dual of begline. */ 04494 case endline: 04495 DEBUG_PRINT1 ("EXECUTING endline.\n"); 04496 04497 if (AT_STRINGS_END (d)) 04498 { 04499 if (!bufp->not_eol) break; 04500 } 04501 04502 /* We have to ``prefetch'' the next character. */ 04503 else if ((d == end1 ? *string2 : *d) == '\n' 04504 && bufp->newline_anchor) 04505 { 04506 break; 04507 } 04508 goto fail; 04509 04510 04511 /* Match at the very beginning of the data. */ 04512 case begbuf: 04513 DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 04514 if (AT_STRINGS_BEG (d)) 04515 break; 04516 goto fail; 04517 04518 04519 /* Match at the very end of the data. */ 04520 case endbuf: 04521 DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 04522 if (AT_STRINGS_END (d)) 04523 break; 04524 goto fail; 04525 04526 04527 /* on_failure_keep_string_jump is used to optimize `.*\n'. It 04528 pushes NULL as the value for the string on the stack. Then 04529 `pop_failure_point' will keep the current value for the 04530 string, instead of restoring it. To see why, consider 04531 matching `foo\nbar' against `.*\n'. The .* matches the foo; 04532 then the . fails against the \n. But the next thing we want 04533 to do is match the \n against the \n; if we restored the 04534 string value, we would be back at the foo. 04535 04536 Because this is used only in specific cases, we don't need to 04537 check all the things that `on_failure_jump' does, to make 04538 sure the right things get saved on the stack. Hence we don't 04539 share its code. The only reason to push anything on the 04540 stack at all is that otherwise we would have to change 04541 `anychar's code to do something besides goto fail in this 04542 case; that seems worse than this. */ 04543 case on_failure_keep_string_jump: 04544 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 04545 04546 EXTRACT_NUMBER_AND_INCR (mcnt, p); 04547 #ifdef _LIBC 04548 DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); 04549 #else 04550 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 04551 #endif 04552 04553 PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 04554 break; 04555 04556 04557 /* Uses of on_failure_jump: 04558 04559 Each alternative starts with an on_failure_jump that points 04560 to the beginning of the next alternative. Each alternative 04561 except the last ends with a jump that in effect jumps past 04562 the rest of the alternatives. (They really jump to the 04563 ending jump of the following alternative, because tensioning 04564 these jumps is a hassle.) 04565 04566 Repeats start with an on_failure_jump that points past both 04567 the repetition text and either the following jump or 04568 pop_failure_jump back to this on_failure_jump. */ 04569 case on_failure_jump: 04570 on_failure: 04571 DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 04572 04573 EXTRACT_NUMBER_AND_INCR (mcnt, p); 04574 #ifdef _LIBC 04575 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); 04576 #else 04577 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 04578 #endif 04579 04580 /* If this on_failure_jump comes right before a group (i.e., 04581 the original * applied to a group), save the information 04582 for that group and all inner ones, so that if we fail back 04583 to this point, the group's information will be correct. 04584 For example, in \(a*\)*\1, we need the preceding group, 04585 and in \(zz\(a*\)b*\)\2, we need the inner group. */ 04586 04587 /* We can't use `p' to check ahead because we push 04588 a failure point to `p + mcnt' after we do this. */ 04589 p1 = p; 04590 04591 /* We need to skip no_op's before we look for the 04592 start_memory in case this on_failure_jump is happening as 04593 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 04594 against aba. */ 04595 while (p1 < pend && (re_opcode_t) *p1 == no_op) 04596 p1++; 04597 04598 if (p1 < pend && (re_opcode_t) *p1 == start_memory) 04599 { 04600 /* We have a new highest active register now. This will 04601 get reset at the start_memory we are about to get to, 04602 but we will have saved all the registers relevant to 04603 this repetition op, as described above. */ 04604 highest_active_reg = *(p1 + 1) + *(p1 + 2); 04605 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 04606 lowest_active_reg = *(p1 + 1); 04607 } 04608 04609 DEBUG_PRINT1 (":\n"); 04610 PUSH_FAILURE_POINT (p + mcnt, d, -2); 04611 break; 04612 04613 04614 /* A smart repeat ends with `maybe_pop_jump'. 04615 We change it to either `pop_failure_jump' or `jump'. */ 04616 case maybe_pop_jump: 04617 EXTRACT_NUMBER_AND_INCR (mcnt, p); 04618 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 04619 { 04620 register unsigned char *p2 = p; 04621 04622 /* Compare the beginning of the repeat with what in the 04623 pattern follows its end. If we can establish that there 04624 is nothing that they would both match, i.e., that we 04625 would have to backtrack because of (as in, e.g., `a*a') 04626 then we can change to pop_failure_jump, because we'll 04627 never have to backtrack. 04628 04629 This is not true in the case of alternatives: in 04630 `(a|ab)*' we do need to backtrack to the `ab' alternative 04631 (e.g., if the string was `ab'). But instead of trying to 04632 detect that here, the alternative has put on a dummy 04633 failure point which is what we will end up popping. */ 04634 04635 /* Skip over open/close-group commands. 04636 If what follows this loop is a ...+ construct, 04637 look at what begins its body, since we will have to 04638 match at least one of that. */ 04639 while (1) 04640 { 04641 if (p2 + 2 < pend 04642 && ((re_opcode_t) *p2 == stop_memory 04643 || (re_opcode_t) *p2 == start_memory)) 04644 p2 += 3; 04645 else if (p2 + 6 < pend 04646 && (re_opcode_t) *p2 == dummy_failure_jump) 04647 p2 += 6; 04648 else 04649 break; 04650 } 04651 04652 p1 = p + mcnt; 04653 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 04654 to the `maybe_finalize_jump' of this case. Examine what 04655 follows. */ 04656 04657 /* If we're at the end of the pattern, we can change. */ 04658 if (p2 == pend) 04659 { 04660 /* Consider what happens when matching ":\(.*\)" 04661 against ":/". I don't really understand this code 04662 yet. */ 04663 p[-3] = (unsigned char) pop_failure_jump; 04664 DEBUG_PRINT1 04665 (" End of pattern: change to `pop_failure_jump'.\n"); 04666 } 04667 04668 else if ((re_opcode_t) *p2 == exactn 04669 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 04670 { 04671 register unsigned char c 04672 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 04673 04674 if ((re_opcode_t) p1[3] == exactn && p1[5] != c) 04675 { 04676 p[-3] = (unsigned char) pop_failure_jump; 04677 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 04678 c, p1[5]); 04679 } 04680 04681 else if ((re_opcode_t) p1[3] == charset 04682 || (re_opcode_t) p1[3] == charset_not) 04683 { 04684 int not = (re_opcode_t) p1[3] == charset_not; 04685 04686 if (c < (unsigned char) (p1[4] * BYTEWIDTH) 04687 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 04688 not = !not; 04689 04690 /* `not' is equal to 1 if c would match, which means 04691 that we can't change to pop_failure_jump. */ 04692 if (!not) 04693 { 04694 p[-3] = (unsigned char) pop_failure_jump; 04695 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 04696 } 04697 } 04698 } 04699 else if ((re_opcode_t) *p2 == charset) 04700 { 04701 #ifdef DEBUG 04702 register unsigned char c 04703 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 04704 #endif 04705 04706 #if 0 04707 if ((re_opcode_t) p1[3] == exactn 04708 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 04709 && (p2[2 + p1[5] / BYTEWIDTH] 04710 & (1 << (p1[5] % BYTEWIDTH))))) 04711 #else 04712 if ((re_opcode_t) p1[3] == exactn 04713 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] 04714 && (p2[2 + p1[4] / BYTEWIDTH] 04715 & (1 << (p1[4] % BYTEWIDTH))))) 04716 #endif 04717 { 04718 p[-3] = (unsigned char) pop_failure_jump; 04719 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 04720 c, p1[5]); 04721 } 04722 04723 else if ((re_opcode_t) p1[3] == charset_not) 04724 { 04725 int idx; 04726 /* We win if the charset_not inside the loop 04727 lists every character listed in the charset after. */ 04728 for (idx = 0; idx < (int) p2[1]; idx++) 04729 if (! (p2[2 + idx] == 0 04730 || (idx < (int) p1[4] 04731 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 04732 break; 04733 04734 if (idx == p2[1]) 04735 { 04736 p[-3] = (unsigned char) pop_failure_jump; 04737 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 04738 } 04739 } 04740 else if ((re_opcode_t) p1[3] == charset) 04741 { 04742 int idx; 04743 /* We win if the charset inside the loop 04744 has no overlap with the one after the loop. */ 04745 for (idx = 0; 04746 idx < (int) p2[1] && idx < (int) p1[4]; 04747 idx++) 04748 if ((p2[2 + idx] & p1[5 + idx]) != 0) 04749 break; 04750 04751 if (idx == p2[1] || idx == p1[4]) 04752 { 04753 p[-3] = (unsigned char) pop_failure_jump; 04754 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 04755 } 04756 } 04757 } 04758 } 04759 p -= 2; /* Point at relative address again. */ 04760 if ((re_opcode_t) p[-1] != pop_failure_jump) 04761 { 04762 p[-1] = (unsigned char) jump; 04763 DEBUG_PRINT1 (" Match => jump.\n"); 04764 goto unconditional_jump; 04765 } 04766 /* Note fall through. */ 04767 04768 04769 /* The end of a simple repeat has a pop_failure_jump back to 04770 its matching on_failure_jump, where the latter will push a 04771 failure point. The pop_failure_jump takes off failure 04772 points put on by this pop_failure_jump's matching 04773 on_failure_jump; we got through the pattern to here from the 04774 matching on_failure_jump, so didn't fail. */ 04775 case pop_failure_jump: 04776 { 04777 /* We need to pass separate storage for the lowest and 04778 highest registers, even though we don't care about the 04779 actual values. Otherwise, we will restore only one 04780 register from the stack, since lowest will == highest in 04781 `pop_failure_point'. */ 04782 active_reg_t dummy_low_reg, dummy_high_reg; 04783 unsigned char *pdummy; 04784 const char *sdummy; 04785 04786 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 04787 POP_FAILURE_POINT (sdummy, pdummy, 04788 dummy_low_reg, dummy_high_reg, 04789 reg_dummy, reg_dummy, reg_info_dummy); 04790 } 04791 /* Note fall through. */ 04792 04793 unconditional_jump: 04794 #ifdef _LIBC 04795 DEBUG_PRINT2 ("\n%p: ", p); 04796 #else 04797 DEBUG_PRINT2 ("\n0x%x: ", p); 04798 #endif 04799 /* Note fall through. */ 04800 04801 /* Unconditionally jump (without popping any failure points). */ 04802 case jump: 04803 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 04804 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 04805 p += mcnt; /* Do the jump. */ 04806 #ifdef _LIBC 04807 DEBUG_PRINT2 ("(to %p).\n", p); 04808 #else 04809 DEBUG_PRINT2 ("(to 0x%x).\n", p); 04810 #endif 04811 break; 04812 04813 04814 /* We need this opcode so we can detect where alternatives end 04815 in `group_match_null_string_p' et al. */ 04816 case jump_past_alt: 04817 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 04818 goto unconditional_jump; 04819 04820 04821 /* Normally, the on_failure_jump pushes a failure point, which 04822 then gets popped at pop_failure_jump. We will end up at 04823 pop_failure_jump, also, and with a pattern of, say, `a+', we 04824 are skipping over the on_failure_jump, so we have to push 04825 something meaningless for pop_failure_jump to pop. */ 04826 case dummy_failure_jump: 04827 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 04828 /* It doesn't matter what we push for the string here. What 04829 the code at `fail' tests is the value for the pattern. */ 04830 PUSH_FAILURE_POINT (0, 0, -2); 04831 goto unconditional_jump; 04832 04833 04834 /* At the end of an alternative, we need to push a dummy failure 04835 point in case we are followed by a `pop_failure_jump', because 04836 we don't want the failure point for the alternative to be 04837 popped. For example, matching `(a|ab)*' against `aab' 04838 requires that we match the `ab' alternative. */ 04839 case push_dummy_failure: 04840 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 04841 /* See comments just above at `dummy_failure_jump' about the 04842 two zeroes. */ 04843 PUSH_FAILURE_POINT (0, 0, -2); 04844 break; 04845 04846 /* Have to succeed matching what follows at least n times. 04847 After that, handle like `on_failure_jump'. */ 04848 case succeed_n: 04849 EXTRACT_NUMBER (mcnt, p + 2); 04850 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 04851 04852 assert (mcnt >= 0); 04853 /* Originally, this is how many times we HAVE to succeed. */ 04854 if (mcnt > 0) 04855 { 04856 mcnt--; 04857 p += 2; 04858 STORE_NUMBER_AND_INCR (p, mcnt); 04859 #ifdef _LIBC 04860 DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt); 04861 #else 04862 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt); 04863 #endif 04864 } 04865 else if (mcnt == 0) 04866 { 04867 #ifdef _LIBC 04868 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2); 04869 #else 04870 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); 04871 #endif 04872 p[2] = (unsigned char) no_op; 04873 p[3] = (unsigned char) no_op; 04874 goto on_failure; 04875 } 04876 break; 04877 04878 case jump_n: 04879 EXTRACT_NUMBER (mcnt, p + 2); 04880 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 04881 04882 /* Originally, this is how many times we CAN jump. */ 04883 if (mcnt) 04884 { 04885 mcnt--; 04886 STORE_NUMBER (p + 2, mcnt); 04887 #ifdef _LIBC 04888 DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt); 04889 #else 04890 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt); 04891 #endif 04892 goto unconditional_jump; 04893 } 04894 /* If don't have to jump any more, skip over the rest of command. */ 04895 else 04896 p += 4; 04897 break; 04898 04899 case set_number_at: 04900 { 04901 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 04902 04903 EXTRACT_NUMBER_AND_INCR (mcnt, p); 04904 p1 = p + mcnt; 04905 EXTRACT_NUMBER_AND_INCR (mcnt, p); 04906 #ifdef _LIBC 04907 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); 04908 #else 04909 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 04910 #endif 04911 STORE_NUMBER (p1, mcnt); 04912 break; 04913 } 04914 04915 #if 0 04916 /* The DEC Alpha C compiler 3.x generates incorrect code for the 04917 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 04918 AT_WORD_BOUNDARY, so this code is disabled. Expanding the 04919 macro and introducing temporary variables works around the bug. */ 04920 04921 case wordbound: 04922 DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 04923 if (AT_WORD_BOUNDARY (d)) 04924 break; 04925 goto fail; 04926 04927 case notwordbound: 04928 DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 04929 if (AT_WORD_BOUNDARY (d)) 04930 goto fail; 04931 break; 04932 #else 04933 case wordbound: 04934 { 04935 boolean prevchar, thischar; 04936 04937 DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 04938 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 04939 break; 04940 04941 prevchar = WORDCHAR_P (d - 1); 04942 thischar = WORDCHAR_P (d); 04943 if (prevchar != thischar) 04944 break; 04945 goto fail; 04946 } 04947 04948 case notwordbound: 04949 { 04950 boolean prevchar, thischar; 04951 04952 DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 04953 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 04954 goto fail; 04955 04956 prevchar = WORDCHAR_P (d - 1); 04957 thischar = WORDCHAR_P (d); 04958 if (prevchar != thischar) 04959 goto fail; 04960 break; 04961 } 04962 #endif 04963 04964 case wordbeg: 04965 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 04966 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 04967 break; 04968 goto fail; 04969 04970 case wordend: 04971 DEBUG_PRINT1 ("EXECUTING wordend.\n"); 04972 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) 04973 && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) 04974 break; 04975 goto fail; 04976 04977 #ifdef emacs 04978 case before_dot: 04979 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 04980 if (PTR_CHAR_POS ((unsigned char *) d) >= point) 04981 goto fail; 04982 break; 04983 04984 case at_dot: 04985 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 04986 if (PTR_CHAR_POS ((unsigned char *) d) != point) 04987 goto fail; 04988 break; 04989 04990 case after_dot: 04991 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 04992 if (PTR_CHAR_POS ((unsigned char *) d) <= point) 04993 goto fail; 04994 break; 04995 04996 case syntaxspec: 04997 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 04998 mcnt = *p++; 04999 goto matchsyntax; 05000 05001 case wordchar: 05002 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 05003 mcnt = (int) Sword; 05004 matchsyntax: 05005 PREFETCH (); 05006 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 05007 d++; 05008 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) 05009 goto fail; 05010 SET_REGS_MATCHED (); 05011 break; 05012 05013 case notsyntaxspec: 05014 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 05015 mcnt = *p++; 05016 goto matchnotsyntax; 05017 05018 case notwordchar: 05019 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 05020 mcnt = (int) Sword; 05021 matchnotsyntax: 05022 PREFETCH (); 05023 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 05024 d++; 05025 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) 05026 goto fail; 05027 SET_REGS_MATCHED (); 05028 break; 05029 05030 #else /* not emacs */ 05031 case wordchar: 05032 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 05033 PREFETCH (); 05034 if (!WORDCHAR_P (d)) 05035 goto fail; 05036 SET_REGS_MATCHED (); 05037 d++; 05038 break; 05039 05040 case notwordchar: 05041 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 05042 PREFETCH (); 05043 if (WORDCHAR_P (d)) 05044 goto fail; 05045 SET_REGS_MATCHED (); 05046 d++; 05047 break; 05048 #endif /* not emacs */ 05049 05050 default: 05051 abort (); 05052 } 05053 continue; /* Successfully executed one pattern command; keep going. */ 05054 05055 05056 /* We goto here if a matching operation fails. */ 05057 fail: 05058 if (!FAIL_STACK_EMPTY ()) 05059 { /* A restart point is known. Restore to that state. */ 05060 DEBUG_PRINT1 ("\nFAIL:\n"); 05061 POP_FAILURE_POINT (d, p, 05062 lowest_active_reg, highest_active_reg, 05063 regstart, regend, reg_info); 05064 05065 /* If this failure point is a dummy, try the next one. */ 05066 if (!p) 05067 goto fail; 05068 05069 /* If we failed to the end of the pattern, don't examine *p. */ 05070 assert (p <= pend); 05071 if (p < pend) 05072 { 05073 boolean is_a_jump_n = false; 05074 05075 /* If failed to a backwards jump that's part of a repetition 05076 loop, need to pop this failure point and use the next one. */ 05077 switch ((re_opcode_t) *p) 05078 { 05079 case jump_n: 05080 is_a_jump_n = true; 05081 case maybe_pop_jump: 05082 case pop_failure_jump: 05083 case jump: 05084 p1 = p + 1; 05085 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 05086 p1 += mcnt; 05087 05088 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 05089 || (!is_a_jump_n 05090 && (re_opcode_t) *p1 == on_failure_jump)) 05091 goto fail; 05092 break; 05093 default: 05094 /* do nothing */ ; 05095 } 05096 } 05097 05098 if (d >= string1 && d <= end1) 05099 dend = end_match_1; 05100 } 05101 else 05102 break; /* Matching at this starting point really fails. */ 05103 } /* for (;;) */ 05104 05105 if (best_regs_set) 05106 goto restore_best_regs;
| static int re_match_2_internal | ( | ) | [static] |
| int re_search | ( | struct re_pattern_buffer * | bufp, | |
| const char * | string, | |||
| int | size, | |||
| int | startpos, | |||
| int | range, | |||
| struct re_registers * | regs | |||
| ) |
| int re_search_2 | ( | struct re_pattern_buffer * | bufp, | |
| const char * | string1, | |||
| int | size1, | |||
| const char * | string2, | |||
| int | size2, | |||
| int | startpos, | |||
| int | range, | |||
| struct re_registers * | regs, | |||
| int | stop | |||
| ) |
Definition at line 3437 of file regex.c.
03450 { 03451 int val; 03452 register char *fastmap = bufp->fastmap; 03453 register RE_TRANSLATE_TYPE translate = bufp->translate; 03454 int total_size = size1 + size2; 03455 int endpos = startpos + range; 03456 03457 /* Check for out-of-range STARTPOS. */ 03458 if (startpos < 0 || startpos > total_size) 03459 return -1; 03460 03461 /* Fix up RANGE if it might eventually take us outside 03462 the virtual concatenation of STRING1 and STRING2. 03463 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ 03464 if (endpos < 0) 03465 range = 0 - startpos; 03466 else if (endpos > total_size) 03467 range = total_size - startpos; 03468 03469 /* If the search isn't to be a backwards one, don't waste time in a 03470 search for a pattern that must be anchored. */ 03471 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) 03472 { 03473 if (startpos > 0) 03474 return -1; 03475 else 03476 range = 1; 03477 } 03478 03479 #ifdef emacs 03480 /* In a forward search for something that starts with \=. 03481 don't keep searching past point. */ 03482 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) 03483 { 03484 range = PT - startpos; 03485 if (range <= 0) 03486 return -1; 03487 } 03488 #endif /* emacs */ 03489 03490 /* Update the fastmap now if not correct already. */ 03491 if (fastmap && !bufp->fastmap_accurate) 03492 if (re_compile_fastmap (bufp) == -2) 03493 return -2; 03494 03495 /* Loop through the string, looking for a place to start matching. */ 03496 for (;;) 03497 { 03498 /* If a fastmap is supplied, skip quickly over characters that 03499 cannot be the start of a match. If the pattern can match the 03500 null string, however, we don't need to skip characters; we want 03501 the first null string. */ 03502 if (fastmap && startpos < total_size && !bufp->can_be_null) 03503 { 03504 if (range > 0) /* Searching forwards. */ 03505 { 03506 register const char *d; 03507 register int lim = 0; 03508 int irange = range; 03509 03510 if (startpos < size1 && startpos + range >= size1) 03511 lim = range - (size1 - startpos); 03512 03513 d = (startpos >= size1 ? string2 - size1 : string1) + startpos; 03514 03515 /* Written out as an if-else to avoid testing `translate' 03516 inside the loop. */ 03517 if (translate) 03518 while (range > lim 03519 && !fastmap[(unsigned char) 03520 translate[(unsigned char) *d++]]) 03521 range--; 03522 else 03523 while (range > lim && !fastmap[(unsigned char) *d++]) 03524 range--; 03525 03526 startpos += irange - range; 03527 } 03528 else /* Searching backwards. */ 03529 { 03530 register char c = (size1 == 0 || startpos >= size1 03531 ? string2[startpos - size1] 03532 : string1[startpos]); 03533 03534 if (!fastmap[(unsigned char) TRANSLATE (c)]) 03535 goto advance; 03536 } 03537 } 03538 03539 /* If can't match the null string, and that's all we have left, fail. */ 03540 if (range >= 0 && startpos == total_size && fastmap 03541 && !bufp->can_be_null) 03542 return -1; 03543 03544 val = re_match_2_internal (bufp, string1, size1, string2, size2, 03545 startpos, regs, stop); 03546 #ifndef REGEX_MALLOC 03547 #ifdef C_ALLOCA 03548 alloca (0); 03549 #endif 03550 #endif 03551 03552 if (val >= 0) 03553 return startpos; 03554 03555 if (val == -2) 03556 return -2; 03557 03558 advance: 03559 if (!range) 03560 break; 03561 else if (range > 0) 03562 { 03563 range--; 03564 startpos++; 03565 } 03566 else 03567 { 03568 range++;
| void re_set_registers | ( | struct re_pattern_buffer * | bufp, | |
| struct re_registers * | regs, | |||
| unsigned | num_regs, | |||
| regoff_t * | starts, | |||
| regoff_t * | ends | |||
| ) |
Definition at line 3377 of file regex.c.
03387 { 03388 if (num_regs) 03389 { 03390 bufp->regs_allocated = REGS_REALLOCATE; 03391 regs->num_regs = num_regs; 03392 regs->start = starts; 03393 regs->end = ends; 03394 } 03395 else 03396 {
| reg_syntax_t re_set_syntax | ( | reg_syntax_t | syntax | ) |
Definition at line 953 of file regex.c.
00955 { 00956 reg_syntax_t ret = re_syntax_options; 00957 00958 re_syntax_options = syntax; 00959 #ifdef DEBUG 00960 if (syntax & RE_DEBUG) 00961 debug = 1; 00962 else if (debug) /* was on but now is not */ 00963 debug = 0; 00964 #endif /* DEBUG */ 00965 return ret; 00966 }
| int regcomp | ( | regex_t * | preg, | |
| const char * | pattern, | |||
| int | cflags | |||
| ) |
Definition at line 5517 of file regex.c.
05526 { 05527 reg_errcode_t ret; 05528 reg_syntax_t syntax 05529 = (cflags & REG_EXTENDED) ? 05530 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 05531 05532 /* regex_compile will allocate the space for the compiled pattern. */ 05533 preg->buffer = 0; 05534 preg->allocated = 0; 05535 preg->used = 0; 05536 05537 /* Don't bother to use a fastmap when searching. This simplifies the 05538 REG_NEWLINE case: if we used a fastmap, we'd have to put all the 05539 characters after newlines into the fastmap. This way, we just try 05540 every character. */ 05541 preg->fastmap = 0; 05542 05543 if (cflags & REG_ICASE) 05544 { 05545 unsigned i; 05546 05547 preg->translate 05548 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 05549 * sizeof (*(RE_TRANSLATE_TYPE)0)); 05550 if (preg->translate == NULL) 05551 return (int) REG_ESPACE; 05552 05553 /* Map uppercase characters to corresponding lowercase ones. */ 05554 for (i = 0; i < CHAR_SET_SIZE; i++) 05555 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; 05556 } 05557 else 05558 preg->translate = NULL; 05559 05560 /* If REG_NEWLINE is set, newlines are treated differently. */ 05561 if (cflags & REG_NEWLINE) 05562 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 05563 syntax &= ~RE_DOT_NEWLINE; 05564 syntax |= RE_HAT_LISTS_NOT_NEWLINE; 05565 /* It also changes the matching behavior. */ 05566 preg->newline_anchor = 1; 05567 } 05568 else 05569 preg->newline_anchor = 0; 05570 05571 preg->no_sub = !!(cflags & REG_NOSUB); 05572 05573 /* POSIX says a null character in the pattern terminates it, so we 05574 can use strlen here in compiling the pattern. */ 05575 ret = regex_compile (pattern, strlen (pattern), syntax, preg); 05576 05577 /* POSIX doesn't distinguish between an unmatched open-group and an
| size_t regerror | ( | int | errcode, | |
| const regex_t * | preg, | |||
| char * | errbuf, | |||
| size_t | errbuf_size | |||
| ) |
Definition at line 5660 of file regex.c.
05666 { 05667 const char *msg; 05668 size_t msg_size; 05669 05670 if (errcode < 0 05671 || errcode >= (int) (sizeof (re_error_msgid) 05672 / sizeof (re_error_msgid[0]))) 05673 /* Only error codes returned by the rest of the code should be passed 05674 to this routine. If we are given anything else, or if other regex 05675 code generates an invalid error code, then the program has a bug. 05676 Dump core so we can fix it. */ 05677 abort (); 05678 05679 msg = gettext (re_error_msgid[errcode]); 05680 05681 msg_size = strlen (msg) + 1; /* Includes the null. */ 05682 05683 if (errbuf_size != 0) 05684 { 05685 if (msg_size > errbuf_size) 05686 { 05687 strncpy (errbuf, msg, errbuf_size - 1); 05688 errbuf[errbuf_size - 1] = 0; 05689 } 05690 else
| static reg_errcode_t regex_compile | ( | char * | pattern, | |
| size_t | size, | |||
| reg_syntax_t | syntax, | |||
| struct re_pattern_buffer * | bufp | |||
| ) | const [static] |
Definition at line 1776 of file regex.c.
01786 { 01787 /* We fetch characters from PATTERN here. Even though PATTERN is 01788 `char *' (i.e., signed), we declare these variables as unsigned, so 01789 they can be reliably used as array indices. */ 01790 register unsigned char c, c1; 01791 01792 /* A random temporary spot in PATTERN. */ 01793 const char *p1; 01794 01795 /* Points to the end of the buffer, where we should append. */ 01796 register unsigned char *b; 01797 01798 /* Keeps track of unclosed groups. */ 01799 compile_stack_type compile_stack; 01800 01801 /* Points to the current (ending) position in the pattern. */ 01802 const char *p = pattern; 01803 const char *pend = pattern + size; 01804 01805 /* How to translate the characters in the pattern. */ 01806 RE_TRANSLATE_TYPE translate = bufp->translate; 01807 01808 /* Address of the count-byte of the most recently inserted `exactn' 01809 command. This makes it possible to tell if a new exact-match 01810 character can be added to that command or if the character requires 01811 a new `exactn' command. */ 01812 unsigned char *pending_exact = 0; 01813 01814 /* Address of start of the most recently finished expression. 01815 This tells, e.g., postfix * where to find the start of its 01816 operand. Reset at the beginning of groups and alternatives. */ 01817 unsigned char *laststart = 0; 01818 01819 /* Address of beginning of regexp, or inside of last group. */ 01820 unsigned char *begalt; 01821 01822 /* Place in the uncompiled pattern (i.e., the {) to 01823 which to go back if the interval is invalid. */ 01824 const char *beg_interval; 01825 01826 /* Address of the place where a forward jump should go to the end of 01827 the containing expression. Each alternative of an `or' -- except the 01828 last -- ends with a forward jump of this sort. */ 01829 unsigned char *fixup_alt_jump = 0; 01830 01831 /* Counts open-groups as they are encountered. Remembered for the 01832 matching close-group on the compile stack, so the same register 01833 number is put in the stop_memory as the start_memory. */ 01834 regnum_t regnum = 0; 01835 01836 #ifdef DEBUG 01837 DEBUG_PRINT1 ("\nCompiling pattern: "); 01838 if (debug) 01839 { 01840 unsigned debug_count; 01841 01842 for (debug_count = 0; debug_count < size; debug_count++) 01843 putchar (pattern[debug_count]); 01844 putchar ('\n'); 01845 } 01846 #endif /* DEBUG */ 01847 01848 /* Initialize the compile stack. */ 01849 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); 01850 if (compile_stack.stack == NULL) 01851 return REG_ESPACE; 01852 01853 compile_stack.size = INIT_COMPILE_STACK_SIZE; 01854 compile_stack.avail = 0; 01855 01856 /* Initialize the pattern buffer. */ 01857 bufp->syntax = syntax; 01858 bufp->fastmap_accurate = 0; 01859 bufp->not_bol = bufp->not_eol = 0; 01860 01861 /* Set `used' to zero, so that if we return an error, the pattern 01862 printer (for debugging) will think there's no pattern. We reset it 01863 at the end. */ 01864 bufp->used = 0; 01865 01866 /* Always count groups, whether or not bufp->no_sub is set. */ 01867 bufp->re_nsub = 0; 01868 01869 #if !defined (emacs) && !defined (SYNTAX_TABLE) 01870 /* Initialize the syntax table. */ 01871 init_syntax_once (); 01872 #endif 01873 01874 if (bufp->allocated == 0) 01875 { 01876 if (bufp->buffer) 01877 { /* If zero allocated, but buffer is non-null, try to realloc 01878 enough space. This loses if buffer's address is bogus, but 01879 that is the user's responsibility. */ 01880 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); 01881 } 01882 else 01883 { /* Caller did not allocate a buffer. Do it for them. */ 01884 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); 01885 } 01886 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); 01887 01888 bufp->allocated = INIT_BUF_SIZE; 01889 } 01890 01891 begalt = b = bufp->buffer; 01892 01893 /* Loop through the uncompiled pattern until we're at the end. */ 01894 while (p != pend) 01895 { 01896 PATFETCH (c); 01897 01898 switch (c) 01899 { 01900 case '^': 01901 { 01902 if ( /* If at start of pattern, it's an operator. */ 01903 p == pattern + 1 01904 /* If context independent, it's an operator. */ 01905 || syntax & RE_CONTEXT_INDEP_ANCHORS 01906 /* Otherwise, depends on what's come before. */ 01907 || at_begline_loc_p (pattern, p, syntax)) 01908 BUF_PUSH (begline); 01909 else 01910 goto normal_char; 01911 } 01912 break; 01913 01914 01915 case '$': 01916 { 01917 if ( /* If at end of pattern, it's an operator. */ 01918 p == pend 01919 /* If context independent, it's an operator. */ 01920 || syntax & RE_CONTEXT_INDEP_ANCHORS 01921 /* Otherwise, depends on what's next. */ 01922 || at_endline_loc_p (p, pend, syntax)) 01923 BUF_PUSH (endline); 01924 else 01925 goto normal_char; 01926 } 01927 break; 01928 01929 01930 case '+': 01931 case '?': 01932 if ((syntax & RE_BK_PLUS_QM) 01933 || (syntax & RE_LIMITED_OPS)) 01934 goto normal_char; 01935 handle_plus: 01936 case '*': 01937 /* If there is no previous pattern... */ 01938 if (!laststart) 01939 { 01940 if (syntax & RE_CONTEXT_INVALID_OPS) 01941 FREE_STACK_RETURN (REG_BADRPT); 01942 else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 01943 goto normal_char; 01944 } 01945 01946 { 01947 /* Are we optimizing this jump? */ 01948 boolean keep_string_p = false; 01949 01950 /* 1 means zero (many) matches is allowed. */ 01951 char zero_times_ok = 0, many_times_ok = 0; 01952 01953 /* If there is a sequence of repetition chars, collapse it 01954 down to just one (the right one). We can't combine 01955 interval operators with these because of, e.g., `a{2}*', 01956 which should only match an even number of `a's. */ 01957 01958 for (;;) 01959 { 01960 zero_times_ok |= c != '+'; 01961 many_times_ok |= c != '?'; 01962 01963 if (p == pend) 01964 break; 01965 01966 PATFETCH (c); 01967 01968 if (c == '*' 01969 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 01970 ; 01971 01972 else if (syntax & RE_BK_PLUS_QM && c == '\\') 01973 { 01974 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 01975 01976 PATFETCH (c1); 01977 if (!(c1 == '+' || c1 == '?')) 01978 { 01979 PATUNFETCH; 01980 PATUNFETCH; 01981 break; 01982 } 01983 01984 c = c1; 01985 } 01986 else 01987 { 01988 PATUNFETCH; 01989 break; 01990 } 01991 01992 /* If we get here, we found another repeat character. */ 01993 } 01994 01995 /* Star, etc. applied to an empty pattern is equivalent 01996 to an empty pattern. */ 01997 if (!laststart) 01998 break; 01999 02000 /* Now we know whether or not zero matches is allowed 02001 and also whether or not two or more matches is allowed. */ 02002 if (many_times_ok) 02003 { /* More than one repetition is allowed, so put in at the 02004 end a backward relative jump from `b' to before the next 02005 jump we're going to put in below (which jumps from 02006 laststart to after this jump). 02007 02008 But if we are at the `*' in the exact sequence `.*\n', 02009 insert an unconditional jump backwards to the ., 02010 instead of the beginning of the loop. This way we only 02011 push a failure point once, instead of every time 02012 through the loop. */ 02013 assert (p - 1 > pattern); 02014 02015 /* Allocate the space for the jump. */ 02016 GET_BUFFER_SPACE (3); 02017 02018 /* We know we are not at the first character of the pattern, 02019 because laststart was nonzero. And we've already 02020 incremented `p', by the way, to be the character after 02021 the `*'. Do we have to do something analogous here 02022 for null bytes, because of RE_DOT_NOT_NULL? */ 02023 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 02024 && zero_times_ok 02025 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 02026 && !(syntax & RE_DOT_NEWLINE)) 02027 { /* We have .*\n. */ 02028 STORE_JUMP (jump, b, laststart); 02029 keep_string_p = true; 02030 } 02031 else 02032 /* Anything else. */ 02033 STORE_JUMP (maybe_pop_jump, b, laststart - 3); 02034 02035 /* We've added more stuff to the buffer. */ 02036 b += 3; 02037 } 02038 02039 /* On failure, jump from laststart to b + 3, which will be the 02040 end of the buffer after this jump is inserted. */ 02041 GET_BUFFER_SPACE (3); 02042 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 02043 : on_failure_jump, 02044 laststart, b + 3); 02045 pending_exact = 0; 02046 b += 3; 02047 02048 if (!zero_times_ok) 02049 { 02050 /* At least one repetition is required, so insert a 02051 `dummy_failure_jump' before the initial 02052 `on_failure_jump' instruction of the loop. This 02053 effects a skip over that instruction the first time 02054 we hit that loop. */ 02055 GET_BUFFER_SPACE (3); 02056 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); 02057 b += 3; 02058 } 02059 } 02060 break; 02061 02062 02063 case '.': 02064 laststart = b; 02065 BUF_PUSH (anychar); 02066 break; 02067 02068 02069 case '[': 02070 { 02071 boolean had_char_class = false; 02072 02073 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02074 02075 /* Ensure that we have enough space to push a charset: the 02076 opcode, the length count, and the bitset; 34 bytes in all. */ 02077 GET_BUFFER_SPACE (34); 02078 02079 laststart = b; 02080 02081 /* We test `*p == '^' twice, instead of using an if 02082 statement, so we only need one BUF_PUSH. */ 02083 BUF_PUSH (*p == '^' ? charset_not : charset); 02084 if (*p == '^') 02085 p++; 02086 02087 /* Remember the first position in the bracket expression. */ 02088 p1 = p; 02089 02090 /* Push the number of bytes in the bitmap. */ 02091 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 02092 02093 /* Clear the whole map. */ 02094 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 02095 02096 /* charset_not matches newline according to a syntax bit. */ 02097 if ((re_opcode_t) b[-2] == charset_not 02098 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 02099 SET_LIST_BIT ('\n'); 02100 02101 /* Read in characters and ranges, setting map bits. */ 02102 for (;;) 02103 { 02104 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02105 02106 PATFETCH (c); 02107 02108 /* \ might escape characters inside [...] and [^...]. */ 02109 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 02110 { 02111 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 02112 02113 PATFETCH (c1); 02114 SET_LIST_BIT (c1); 02115 continue; 02116 } 02117 02118 /* Could be the end of the bracket expression. If it's 02119 not (i.e., when the bracket expression is `[]' so 02120 far), the ']' character bit gets set way below. */ 02121 if (c == ']' && p != p1 + 1) 02122 break; 02123 02124 /* Look ahead to see if it's a range when the last thing 02125 was a character class. */ 02126 if (had_char_class && c == '-' && *p != ']') 02127 FREE_STACK_RETURN (REG_ERANGE); 02128 02129 /* Look ahead to see if it's a range when the last thing 02130 was a character: if this is a hyphen not at the 02131 beginning or the end of a list, then it's the range 02132 operator. */ 02133 if (c == '-' 02134 && !(p - 2 >= pattern && p[-2] == '[') 02135 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 02136 && *p != ']') 02137 { 02138 reg_errcode_t ret 02139 = compile_range (&p, pend, translate, syntax, b); 02140 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 02141 } 02142 02143 else if (p[0] == '-' && p[1] != ']') 02144 { /* This handles ranges made up of characters only. */ 02145 reg_errcode_t ret; 02146 02147 /* Move past the `-'. */ 02148 PATFETCH (c1); 02149 02150 ret = compile_range (&p, pend, translate, syntax, b); 02151 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 02152 } 02153 02154 /* See if we're at the beginning of a possible character 02155 class. */ 02156 02157 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 02158 { /* Leave room for the null. */ 02159 char str[CHAR_CLASS_MAX_LENGTH + 1]; 02160 02161 PATFETCH (c); 02162 c1 = 0; 02163 02164 /* If pattern is `[[:'. */ 02165 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02166 02167 for (;;) 02168 { 02169 PATFETCH (c); 02170 if (c == ':' || c == ']' || p == pend 02171 || c1 == CHAR_CLASS_MAX_LENGTH) 02172 break; 02173 str[c1++] = c; 02174 } 02175 str[c1] = '\0'; 02176 02177 /* If isn't a word bracketed by `[:' and:`]': 02178 undo the ending character, the letters, and leave 02179 the leading `:' and `[' (but set bits for them). */ 02180 if (c == ':' && *p == ']') 02181 { 02182 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) 02183 boolean is_lower = STREQ (str, "lower"); 02184 boolean is_upper = STREQ (str, "upper"); 02185 wctype_t wt; 02186 int ch; 02187 02188 wt = wctype (str); 02189 if (wt == 0) 02190 FREE_STACK_RETURN (REG_ECTYPE); 02191 02192 /* Throw away the ] at the end of the character 02193 class. */ 02194 PATFETCH (c); 02195 02196 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02197 02198 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) 02199 { 02200 if (iswctype (btowc (ch), wt)) 02201 SET_LIST_BIT (ch); 02202 02203 if (translate && (is_upper || is_lower) 02204 && (ISUPPER (ch) || ISLOWER (ch))) 02205 SET_LIST_BIT (ch); 02206 } 02207 02208 had_char_class = true; 02209 #else 02210 int ch; 02211 boolean is_alnum = STREQ (str, "alnum"); 02212 boolean is_alpha = STREQ (str, "alpha"); 02213 boolean is_blank = STREQ (str, "blank"); 02214 boolean is_cntrl = STREQ (str, "cntrl"); 02215 boolean is_digit = STREQ (str, "digit"); 02216 boolean is_graph = STREQ (str, "graph"); 02217 boolean is_lower = STREQ (str, "lower"); 02218 boolean is_print = STREQ (str, "print"); 02219 boolean is_punct = STREQ (str, "punct"); 02220 boolean is_space = STREQ (str, "space"); 02221 boolean is_upper = STREQ (str, "upper"); 02222 boolean is_xdigit = STREQ (str, "xdigit"); 02223 02224 if (!IS_CHAR_CLASS (str)) 02225 FREE_STACK_RETURN (REG_ECTYPE); 02226 02227 /* Throw away the ] at the end of the character 02228 class. */ 02229 PATFETCH (c); 02230 02231 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 02232 02233 for (ch = 0; ch < (1 << BYTEWIDTH); ch++) 02234 { 02235 /* This was split into 3 if's to 02236 avoid an arbitrary limit in some compiler. */ 02237 if ( (is_alnum && ISALNUM (ch)) 02238 || (is_alpha && ISALPHA (ch)) 02239 || (is_blank && ISBLANK (ch)) 02240 || (is_cntrl && ISCNTRL (ch))) 02241 SET_LIST_BIT (ch); 02242 if ( (is_digit && ISDIGIT (ch)) 02243 || (is_graph && ISGRAPH (ch)) 02244 || (is_lower && ISLOWER (ch)) 02245 || (is_print && ISPRINT (ch))) 02246 SET_LIST_BIT (ch); 02247 if ( (is_punct && ISPUNCT (ch)) 02248 || (is_space && ISSPACE (ch)) 02249 || (is_upper && ISUPPER (ch)) 02250 || (is_xdigit && ISXDIGIT (ch))) 02251 SET_LIST_BIT (ch); 02252 if ( translate && (is_upper || is_lower) 02253 && (ISUPPER (ch) || ISLOWER (ch))) 02254 SET_LIST_BIT (ch); 02255 } 02256 had_char_class = true; 02257 #endif /* libc || wctype.h */ 02258 } 02259 else 02260 { 02261 c1++; 02262 while (c1--) 02263 PATUNFETCH; 02264 SET_LIST_BIT ('['); 02265 SET_LIST_BIT (':'); 02266 had_char_class = false; 02267 } 02268 } 02269 else 02270 { 02271 had_char_class = false; 02272 SET_LIST_BIT (c); 02273 } 02274 } 02275 02276 /* Discard any (non)matching list bytes that are all 0 at the 02277 end of the map. Decrease the map-length byte too. */ 02278 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 02279 b[-1]--; 02280 b += b[-1]; 02281 } 02282 break; 02283 02284 02285 case '(': 02286 if (syntax & RE_NO_BK_PARENS) 02287 goto handle_open; 02288 else 02289 goto normal_char; 02290 02291 02292 case ')': 02293 if (syntax & RE_NO_BK_PARENS) 02294 goto handle_close; 02295 else 02296 goto normal_char; 02297 02298 02299 case '\n': 02300 if (syntax & RE_NEWLINE_ALT) 02301 goto handle_alt; 02302 else 02303 goto normal_char; 02304 02305 02306 case '|': 02307 if (syntax & RE_NO_BK_VBAR) 02308 goto handle_alt; 02309 else 02310 goto normal_char; 02311 02312 02313 case '{': 02314 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 02315 goto handle_interval; 02316 else 02317 goto normal_char; 02318 02319 02320 case '\\': 02321 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 02322 02323 /* Do not translate the character after the \, so that we can 02324 distinguish, e.g., \B from \b, even if we normally would 02325 translate, e.g., B to b. */ 02326 PATFETCH_RAW (c); 02327 02328 switch (c) 02329 { 02330 case '(': 02331 if (syntax & RE_NO_BK_PARENS) 02332 goto normal_backslash; 02333 02334 handle_open: 02335 bufp->re_nsub++; 02336 regnum++; 02337 02338 if (COMPILE_STACK_FULL) 02339 { 02340 RETALLOC (compile_stack.stack, compile_stack.size << 1, 02341 compile_stack_elt_t); 02342 if (compile_stack.stack == NULL) return REG_ESPACE; 02343 02344 compile_stack.size <<= 1; 02345 } 02346 02347 /* These are the values to restore when we hit end of this 02348 group. They are all relative offsets, so that if the 02349 whole pattern moves because of realloc, they will still 02350 be valid. */ 02351 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 02352 COMPILE_STACK_TOP.fixup_alt_jump 02353 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 02354 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 02355 COMPILE_STACK_TOP.regnum = regnum; 02356 02357 /* We will eventually replace the 0 with the number of 02358 groups inner to this one. But do not push a 02359 start_memory for groups beyond the last one we can 02360 represent in the compiled pattern. */ 02361 if (regnum <= MAX_REGNUM) 02362 { 02363 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; 02364 BUF_PUSH_3 (start_memory, regnum, 0); 02365 } 02366 02367 compile_stack.avail++; 02368 02369 fixup_alt_jump = 0; 02370 laststart = 0; 02371 begalt = b; 02372 /* If we've reached MAX_REGNUM groups, then this open 02373 won't actually generate any code, so we'll have to 02374 clear pending_exact explicitly. */ 02375 pending_exact = 0; 02376 break; 02377 02378 02379 case ')': 02380 if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 02381 02382 if (COMPILE_STACK_EMPTY) { 02383 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 02384 goto normal_backslash; 02385 } 02386 else FREE_STACK_RETURN (REG_ERPAREN); 02387 02388 handle_close: 02389 if (fixup_alt_jump) 02390 { /* Push a dummy failure point at the end of the 02391 alternative for a possible future 02392 `pop_failure_jump' to pop. See comments at 02393 `push_dummy_failure' in `re_match_2'. */ 02394 BUF_PUSH (push_dummy_failure); 02395 02396 /* We allocated space for this jump when we assigned 02397 to `fixup_alt_jump', in the `handle_alt' case below. */ 02398 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 02399 } 02400 02401 /* See similar code for backslashed left paren above. */ 02402 if (COMPILE_STACK_EMPTY) { 02403 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 02404 goto normal_char; 02405 } 02406 else FREE_STACK_RETURN (REG_ERPAREN); 02407 02408 /* Since we just checked for an empty stack above, this 02409 ``can't happen''. */ 02410 assert (compile_stack.avail != 0); 02411 { 02412 /* We don't just want to restore into `regnum', because 02413 later groups should continue to be numbered higher, 02414 as in `(ab)c(de)' -- the second group is #2. */ 02415 regnum_t this_group_regnum; 02416 02417 compile_stack.avail--; 02418 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; 02419 fixup_alt_jump 02420 = COMPILE_STACK_TOP.fixup_alt_jump 02421 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 02422 : 0; 02423 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; 02424 this_group_regnum = COMPILE_STACK_TOP.regnum; 02425 /* If we've reached MAX_REGNUM groups, then this open 02426 won't actually generate any code, so we'll have to 02427 clear pending_exact explicitly. */ 02428 pending_exact = 0; 02429 02430 /* We're at the end of the group, so now we know how many 02431 groups were inside this one. */ 02432 if (this_group_regnum <= MAX_REGNUM) 02433 { 02434 unsigned char *inner_group_loc 02435 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; 02436 02437 *inner_group_loc = regnum - this_group_regnum; 02438 BUF_PUSH_3 (stop_memory, this_group_regnum, 02439 regnum - this_group_regnum); 02440 } 02441 } 02442 break; 02443 02444 02445 case '|': /* `\|'. */ 02446 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 02447 goto normal_backslash; 02448 handle_alt: 02449 if (syntax & RE_LIMITED_OPS) 02450 goto normal_char; 02451 02452 /* Insert before the previous alternative a jump which 02453 jumps to this alternative if the former fails. */ 02454 GET_BUFFER_SPACE (3); 02455 INSERT_JUMP (on_failure_jump, begalt, b + 6); 02456 pending_exact = 0; 02457 b += 3; 02458 02459 /* The alternative before this one has a jump after it 02460 which gets executed if it gets matched. Adjust that 02461 jump so it will jump to this alternative's analogous 02462 jump (put in below, which in turn will jump to the next 02463 (if any) alternative's such jump, etc.). The last such 02464 jump jumps to the correct final destination. A picture: 02465 _____ _____ 02466 | | | | 02467 | v | v 02468 a | b | c 02469 02470 If we are at `b', then fixup_alt_jump right now points to a 02471 three-byte space after `a'. We'll put in the jump, set 02472 fixup_alt_jump to right after `b', and leave behind three 02473 bytes which we'll fill in when we get to after `c'. */ 02474 02475 if (fixup_alt_jump) 02476 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 02477 02478 /* Mark and leave space for a jump after this alternative, 02479 to be filled in later either by next alternative or 02480 when know we're at the end of a series of alternatives. */ 02481 fixup_alt_jump = b; 02482 GET_BUFFER_SPACE (3); 02483 b += 3; 02484 02485 laststart = 0; 02486 begalt = b; 02487 break; 02488 02489 02490 case '{': 02491 /* If \{ is a literal. */ 02492 if (!(syntax & RE_INTERVALS) 02493 /* If we're at `\{' and it's not the open-interval 02494 operator. */ 02495 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) 02496 || (p - 2 == pattern && p == pend)) 02497 goto normal_backslash; 02498 02499 handle_interval: 02500 { 02501 /* If got here, then the syntax allows intervals. */ 02502 02503 /* At least (most) this many matches must be made. */ 02504 int lower_bound = -1, upper_bound = -1; 02505 02506 beg_interval = p - 1; 02507 02508 if (p == pend) 02509 { 02510 if (syntax & RE_NO_BK_BRACES) 02511 goto unfetch_interval; 02512 else 02513 FREE_STACK_RETURN (REG_EBRACE); 02514 } 02515 02516 GET_UNSIGNED_NUMBER (lower_bound); 02517 02518 if (c == ',') 02519 { 02520 GET_UNSIGNED_NUMBER (upper_bound); 02521 if (upper_bound < 0) upper_bound = RE_DUP_MAX; 02522 } 02523 else 02524 /* Interval such as `{1}' => match exactly once. */ 02525 upper_bound = lower_bound; 02526 02527 if (lower_bound < 0 || upper_bound > RE_DUP_MAX 02528 || lower_bound > upper_bound) 02529 { 02530 if (syntax & RE_NO_BK_BRACES) 02531 goto unfetch_interval; 02532 else 02533 FREE_STACK_RETURN (REG_BADBR); 02534 } 02535 02536 if (!(syntax & RE_NO_BK_BRACES)) 02537 { 02538 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); 02539 02540 PATFETCH (c); 02541 } 02542 02543 if (c != '}') 02544 { 02545 if (syntax & RE_NO_BK_BRACES) 02546 goto unfetch_interval; 02547 else 02548 FREE_STACK_RETURN (REG_BADBR); 02549 } 02550 02551 /* We just parsed a valid interval. */ 02552 02553 /* If it's invalid to have no preceding re. */ 02554 if (!laststart) 02555 { 02556 if (syntax & RE_CONTEXT_INVALID_OPS) 02557 FREE_STACK_RETURN (REG_BADRPT); 02558 else if (syntax & RE_CONTEXT_INDEP_OPS) 02559 laststart = b; 02560 else 02561 goto unfetch_interval; 02562 } 02563 02564 /* If the upper bound is zero, don't want to succeed at 02565 all; jump from `laststart' to `b + 3', which will be 02566 the end of the buffer after we insert the jump. */ 02567 if (upper_bound == 0) 02568 { 02569 GET_BUFFER_SPACE (3); 02570 INSERT_JUMP (jump, laststart, b + 3); 02571 b += 3; 02572 } 02573 02574 /* Otherwise, we have a nontrivial interval. When 02575 we're all done, the pattern will look like: 02576 set_number_at <jump count> <upper bound> 02577 set_number_at <succeed_n count> <lower bound> 02578 succeed_n <after jump addr> <succeed_n count> 02579 <body of loop> 02580 jump_n <succeed_n addr> <jump count> 02581 (The upper bound and `jump_n' are omitted if 02582 `upper_bound' is 1, though.) */ 02583 else 02584 { /* If the upper bound is > 1, we need to insert 02585 more at the end of the loop. */ 02586 unsigned nbytes = 10 + (upper_bound > 1) * 10; 02587 02588 GET_BUFFER_SPACE (nbytes); 02589 02590 /* Initialize lower bound of the `succeed_n', even 02591 though it will be set during matching by its 02592 attendant `set_number_at' (inserted next), 02593 because `re_compile_fastmap' needs to know. 02594 Jump to the `jump_n' we might insert below. */ 02595 INSERT_JUMP2 (succeed_n, laststart, 02596 b + 5 + (upper_bound > 1) * 5, 02597 lower_bound); 02598 b += 5; 02599 02600 /* Code to initialize the lower bound. Insert 02601 before the `succeed_n'. The `5' is the last two 02602 bytes of this `set_number_at', plus 3 bytes of 02603 the following `succeed_n'. */ 02604 insert_op2 (set_number_at, laststart, 5, lower_bound, b); 02605 b += 5; 02606 02607 if (upper_bound > 1) 02608 { /* More than one repetition is allowed, so 02609 append a backward jump to the `succeed_n' 02610 that starts this interval. 02611 02612 When we've reached this during matching, 02613 we'll have matched the interval once, so 02614 jump back only `upper_bound - 1' times. */ 02615 STORE_JUMP2 (jump_n, b, laststart + 5, 02616 upper_bound - 1); 02617 b += 5; 02618 02619 /* The location we want to set is the second 02620 parameter of the `jump_n'; that is `b-2' as 02621 an absolute address. `laststart' will be 02622 the `set_number_at' we're about to insert; 02623 `laststart+3' the number to set, the source 02624 for the relative address. But we are 02625 inserting into the middle of the pattern -- 02626 so everything is getting moved up by 5. 02627 Conclusion: (b - 2) - (laststart + 3) + 5, 02628 i.e., b - laststart. 02629 02630 We insert this at the beginning of the loop 02631 so that if we fail during matching, we'll 02632 reinitialize the bounds. */ 02633 insert_op2 (set_number_at, laststart, b - laststart, 02634 upper_bound - 1, b); 02635 b += 5; 02636 } 02637 } 02638 pending_exact = 0; 02639 beg_interval = NULL; 02640 } 02641 break; 02642 02643 unfetch_interval: 02644 /* If an invalid interval, match the characters as literals. */ 02645 assert (beg_interval); 02646 p = beg_interval; 02647 beg_interval = NULL; 02648 02649 /* normal_char and normal_backslash need `c'. */ 02650 PATFETCH (c); 02651 02652 if (!(syntax & RE_NO_BK_BRACES)) 02653 { 02654 if (p > pattern && p[-1] == '\\') 02655 goto normal_backslash; 02656 } 02657 goto normal_char; 02658 02659 #ifdef emacs 02660 /* There is no way to specify the before_dot and after_dot 02661 operators. rms says this is ok. --karl */ 02662 case '=': 02663 BUF_PUSH (at_dot); 02664 break; 02665 02666 case 's': 02667 laststart = b; 02668 PATFETCH (c); 02669 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 02670 break; 02671 02672 case 'S': 02673 laststart = b; 02674 PATFETCH (c); 02675 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 02676 break; 02677 #endif /* emacs */ 02678 02679 02680 case 'w': 02681 if (re_syntax_options & RE_NO_GNU_OPS) 02682 goto normal_char; 02683 laststart = b; 02684 BUF_PUSH (wordchar); 02685 break; 02686 02687 02688 case 'W': 02689 if (re_syntax_options & RE_NO_GNU_OPS) 02690 goto normal_char; 02691 laststart = b; 02692 BUF_PUSH (notwordchar); 02693 break; 02694 02695 02696 case '<': 02697 if (re_syntax_options & RE_NO_GNU_OPS) 02698 goto normal_char; 02699 BUF_PUSH (wordbeg); 02700 break; 02701 02702 case '>': 02703 if (re_syntax_options & RE_NO_GNU_OPS) 02704 goto normal_char; 02705 BUF_PUSH (wordend); 02706 break; 02707 02708 case 'b': 02709 if (re_syntax_options & RE_NO_GNU_OPS) 02710 goto normal_char; 02711 BUF_PUSH (wordbound); 02712 break; 02713 02714 case 'B': 02715 if (re_syntax_options & RE_NO_GNU_OPS) 02716 goto normal_char; 02717 BUF_PUSH (notwordbound); 02718 break; 02719 02720 case '`': 02721 if (re_syntax_options & RE_NO_GNU_OPS) 02722 goto normal_char; 02723 BUF_PUSH (begbuf); 02724 break; 02725 02726 case '\'': 02727 if (re_syntax_options & RE_NO_GNU_OPS) 02728 goto normal_char; 02729 BUF_PUSH (endbuf); 02730 break; 02731 02732 case '1': case '2': case '3': case '4': case '5': 02733 case '6': case '7': case '8': case '9': 02734 if (syntax & RE_NO_BK_REFS) 02735 goto normal_char; 02736 02737 c1 = c - '0'; 02738 02739 if (c1 > regnum) 02740 FREE_STACK_RETURN (REG_ESUBREG); 02741 02742 /* Can't back reference to a subexpression if inside of it. */ 02743 if (group_in_compile_stack (compile_stack, (regnum_t) c1)) 02744 goto normal_char; 02745 02746 laststart = b; 02747 BUF_PUSH_2 (duplicate, c1); 02748 break; 02749 02750 02751 case '+': 02752 case '?': 02753 if (syntax & RE_BK_PLUS_QM) 02754 goto handle_plus; 02755 else 02756 goto normal_backslash; 02757 02758 default: 02759 normal_backslash: 02760 /* You might think it would be useful for \ to mean 02761 not to translate; but if we don't translate it 02762 it will never match anything. */ 02763 c = TRANSLATE (c); 02764 goto normal_char; 02765 } 02766 break; 02767 02768 02769 default: 02770 /* Expects the character in `c'. */ 02771 normal_char: 02772 /* If no exactn currently being built. */ 02773 if (!pending_exact 02774 02775 /* If last exactn not at current position. */ 02776 || pending_exact + *pending_exact + 1 != b 02777 02778 /* We have only one byte following the exactn for the count. */ 02779 || *pending_exact == (1 << BYTEWIDTH) - 1 02780 02781 /* If followed by a repetition operator. */ 02782 || *p == '*' || *p == '^' 02783 || ((syntax & RE_BK_PLUS_QM) 02784 ? *p == '\\' && (p[1] == '+' || p[1] == '?') 02785 : (*p == '+' || *p == '?')) 02786 || ((syntax & RE_INTERVALS) 02787 && ((syntax & RE_NO_BK_BRACES) 02788 ? *p == '{' 02789 : (p[0] == '\\' && p[1] == '{')))) 02790 { 02791 /* Start building a new exactn. */ 02792 02793 laststart = b; 02794 02795 BUF_PUSH_2 (exactn, 0); 02796 pending_exact = b - 1; 02797 } 02798 02799 BUF_PUSH (c); 02800 (*pending_exact)++; 02801 break; 02802 } /* switch (c) */ 02803 } /* while p != pend */ 02804 02805 02806 /* Through the pattern now. */ 02807 02808 if (fixup_alt_jump) 02809 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 02810 02811 if (!COMPILE_STACK_EMPTY) 02812 FREE_STACK_RETURN (REG_EPAREN); 02813 02814 /* If we don't want backtracking, force success 02815 the first time we reach the end of the compiled pattern. */ 02816 if (syntax & RE_NO_POSIX_BACKTRACKING) 02817 BUF_PUSH (succeed); 02818 02819 free (compile_stack.stack); 02820 02821 /* We have succeeded; set the length of the buffer. */ 02822 bufp->used = b - bufp->buffer; 02823 02824 #ifdef DEBUG 02825 if (debug) 02826 { 02827 DEBUG_PRINT1 ("\nCompiled pattern: \n"); 02828 print_compiled_pattern (bufp); 02829 } 02830 #endif /* DEBUG */ 02831 02832 #ifndef MATCH_MAY_ALLOCATE 02833 /* Initialize the failure stack to the largest possible stack. This 02834 isn't necessary unless we're trying to avoid calling alloca in 02835 the search and match routines. */ 02836 { 02837 int num_regs = bufp->re_nsub + 1; 02838 02839 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size 02840 is strictly greater than re_max_failures, the largest possible stack 02841 is 2 * re_max_failures failure points. */ 02842 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) 02843 { 02844 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); 02845 02846 #ifdef emacs 02847 if (! fail_stack.stack) 02848 fail_stack.stack 02849 = (fail_stack_elt_t *) xmalloc (fail_stack.size 02850 * sizeof (fail_stack_elt_t)); 02851 else 02852 fail_stack.stack 02853 = (fail_stack_elt_t *) xrealloc (fail_stack.stack, 02854 (fail_stack.size 02855 * sizeof (fail_stack_elt_t))); 02856 #else /* not emacs */ 02857 if (! fail_stack.stack) 02858 fail_stack.stack 02859 = (fail_stack_elt_t *) malloc (fail_stack.size 02860 * sizeof (fail_stack_elt_t)); 02861 else 02862 fail_stack.stack 02863 = (fail_stack_elt_t *) realloc (fail_stack.stack, 02864 (fail_stack.size 02865 * sizeof (fail_stack_elt_t))); 02866 #endif /* not emacs */ 02867 } 02868 02869 regex_grow_registers (num_regs);
| int regexec | ( | regex_t * | preg, | |
| const char * | string, | |||
| size_t | nmatch, | |||
| pmatch | , | |||
| int | eflags | |||
| ) | const |
Definition at line 5595 of file regex.c.
05606 { 05607 int ret; 05608 struct re_registers regs; 05609 regex_t private_preg; 05610 int len = strlen (string); 05611 boolean want_reg_info = !preg->no_sub && nmatch > 0; 05612 05613 private_preg = *preg; 05614 05615 private_preg.not_bol = !!(eflags & REG_NOTBOL); 05616 private_preg.not_eol = !!(eflags & REG_NOTEOL); 05617 05618 /* The user has told us exactly how many registers to return 05619 information about, via `nmatch'. We have to pass that on to the 05620 matching routines. */ 05621 private_preg.regs_allocated = REGS_FIXED; 05622 05623 if (want_reg_info) 05624 { 05625 regs.num_regs = nmatch; 05626 regs.start = TALLOC (nmatch, regoff_t); 05627 regs.end = TALLOC (nmatch, regoff_t); 05628 if (regs.start == NULL || regs.end == NULL) 05629 return (int) REG_NOMATCH; 05630 } 05631 05632 /* Perform the searching operation. */ 05633 ret = re_search (&private_preg, string, len, 05634 /* start: */ 0, /* range: */ len, 05635 want_reg_info ? ®s : (struct re_registers *) 0); 05636 05637 /* Copy the register information to the POSIX structure. */ 05638 if (want_reg_info) 05639 { 05640 if (ret >= 0) 05641 { 05642 unsigned r; 05643 05644 for (r = 0; r < nmatch; r++) 05645 { 05646 pmatch[r].rm_so = regs.start[r]; 05647 pmatch[r].rm_eo = regs.end[r]; 05648 } 05649 } 05650 05651 /* If we needed the temporary register info, free the space now. */ 05652 free (regs.start); 05653 free (regs.end);
| void regfree | ( | regex_t * | preg | ) |
| static void store_op1 | ( | re_opcode_t | op, | |
| unsigned char * | loc, | |||
| int | arg | |||
| ) | [static] |
| static void store_op2 | ( | re_opcode_t | op, | |
| unsigned char * | loc, | |||
| int | arg1, | |||
| int | arg2 | |||
| ) | [static] |
const char* re_error_msgid[] [static] |
{
gettext_noop ("Success"),
gettext_noop ("No match"),
gettext_noop ("Invalid regular expression"),
gettext_noop ("Invalid collation character"),
gettext_noop ("Invalid character class name"),
gettext_noop ("Trailing backslash"),
gettext_noop ("Invalid back reference"),
gettext_noop ("Unmatched [ or [^"),
gettext_noop ("Unmatched ( or \\("),
gettext_noop ("Unmatched \\{"),
gettext_noop ("Invalid content of \\{\\}"),
gettext_noop ("Invalid range end"),
gettext_noop ("Memory exhausted"),
gettext_noop ("Invalid preceding regular expression"),
gettext_noop ("Premature end of regular expression"),
gettext_noop ("Regular expression too big"),
gettext_noop ("Unmatched ) or \\)"),
}
| int re_max_failures = 20000 |
char re_syntax_table[CHAR_SET_SIZE] [static] |
char reg_unset_dummy [static] |
1.6.1