src/utilfuns/regex.c File Reference

#include <sys/types.h>
#include <stdlib.h>
#include <string.h>
#include "regex.h"
#include <ctype.h>
Include dependency graph for regex.c:

Go to the source code of this file.

Classes

struct  compile_stack_elt_t
struct  compile_stack_type
union  fail_stack_elt
struct  fail_stack_type
union  register_info_type

Defines

#define _GNU_SOURCE
#define assert(e)
#define AT_STRINGS_BEG(d)   ((d) == (size1 ? string1 : string2) || !size2)
#define AT_STRINGS_END(d)   ((d) == end2)
#define bcmp(s1, s2, n)   memcmp ((s1), (s2), (n))
#define bcopy(s, d, n)   memcpy ((d), (s), (n))
#define BUF_PUSH(c)
#define BUF_PUSH_2(c1, c2)
#define BUF_PUSH_3(c1, c2, c3)
#define BYTEWIDTH   8
#define bzero(s, n)   memset ((s), 0, (n))
#define CHAR_CLASS_MAX_LENGTH   6
#define CHAR_SET_SIZE   256
#define COMPILE_STACK_EMPTY   (compile_stack.avail == 0)
#define COMPILE_STACK_FULL   (compile_stack.avail == compile_stack.size)
#define COMPILE_STACK_TOP   (compile_stack.stack[compile_stack.avail])
#define DEBUG_POP(item_addr)
#define DEBUG_PRINT1(x)
#define DEBUG_PRINT2(x1, x2)
#define DEBUG_PRINT3(x1, x2, x3)
#define DEBUG_PRINT4(x1, x2, x3, x4)
#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
#define DEBUG_PUSH(item)
#define DEBUG_STATEMENT(e)
#define DOUBLE_FAIL_STACK(fail_stack)
#define EVER_MATCHED_SOMETHING(R)   ((R).bits.ever_matched_something)
#define EXTEND_BUFFER()
#define EXTRACT_NUMBER(destination, source)
#define EXTRACT_NUMBER_AND_INCR(destination, source)
#define FAIL_STACK_EMPTY()   (fail_stack.avail == 0)
#define FAIL_STACK_FULL()   (fail_stack.avail == fail_stack.size)
#define FAIL_STACK_PTR_EMPTY()   (fail_stack_ptr->avail == 0)
#define false   0
#define FIRST_STRING_P(ptr)   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
#define FREE_STACK_RETURN(value)   return (free (compile_stack.stack), value)
#define FREE_VAR(var)   if (var) REGEX_FREE (var); var = NULL
#define FREE_VARIABLES()
#define GET_BUFFER_SPACE(n)
#define GET_UNSIGNED_NUMBER(num)
#define gettext(msgid)   (msgid)
#define gettext_noop(String)   String
#define HAVE_STRING_H
#define INIT_BUF_SIZE   32
#define INIT_COMPILE_STACK_SIZE   32
#define INIT_FAIL_STACK()
#define INIT_FAILURE_ALLOC   5
#define INSERT_JUMP(op, loc, to)   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
#define INSERT_JUMP2(op, loc, to, arg)   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
#define IS_ACTIVE(R)   ((R).bits.is_active)
#define IS_CHAR_CLASS(string)
#define ISALNUM(c)   (ISASCII (c) && isalnum (c))
#define ISALPHA(c)   (ISASCII (c) && isalpha (c))
#define ISASCII(c)   1
#define ISBLANK(c)   ((c) == ' ' || (c) == '\t')
#define ISCNTRL(c)   (ISASCII (c) && iscntrl (c))
#define ISDIGIT(c)   (ISASCII (c) && isdigit (c))
#define ISGRAPH(c)   (ISASCII (c) && isprint (c) && !isspace (c))
#define ISLOWER(c)   (ISASCII (c) && islower (c))
#define ISPRINT(c)   (ISASCII (c) && isprint (c))
#define ISPUNCT(c)   (ISASCII (c) && ispunct (c))
#define ISSPACE(c)   (ISASCII (c) && isspace (c))
#define ISUPPER(c)   (ISASCII (c) && isupper (c))
#define ISXDIGIT(c)   (ISASCII (c) && isxdigit (c))
#define MATCH_MAY_ALLOCATE
#define MATCH_NULL_UNSET_VALUE   3
#define MATCHED_SOMETHING(R)   ((R).bits.matched_something)
#define MATCHING_IN_FIRST_STRING   (dend == end_match_1)
#define MAX(a, b)   ((a) > (b) ? (a) : (b))
#define MAX_BUF_SIZE   (1L << 16)
#define MAX_FAILURE_ITEMS   (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
#define MAX_REGNUM   255
#define MIN(a, b)   ((a) < (b) ? (a) : (b))
#define NO_HIGHEST_ACTIVE_REG   (1 << BYTEWIDTH)
#define NO_LOWEST_ACTIVE_REG   (NO_HIGHEST_ACTIVE_REG + 1)
#define NULL   (void *)0
#define NUM_FAILURE_ITEMS
#define NUM_NONREG_ITEMS   4
#define NUM_REG_ITEMS   3
#define PATFETCH(c)
#define PATFETCH_RAW(c)
#define PATUNFETCH   p--
#define POINTER_TO_OFFSET(ptr)
#define POP_FAILURE_ELT()   fail_stack.stack[--fail_stack.avail]
#define POP_FAILURE_INT()   fail_stack.stack[--fail_stack.avail].integer
#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)
#define POP_FAILURE_POINTER()   fail_stack.stack[--fail_stack.avail].pointer
#define PREFETCH()
#define PUSH_FAILURE_ELT(item)   fail_stack.stack[fail_stack.avail++] = (item)
#define PUSH_FAILURE_INT(item)   fail_stack.stack[fail_stack.avail++].integer = (item)
#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)
#define PUSH_FAILURE_POINTER(item)   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)
#define REALLOC(p, s)   realloc ((p), (s))
#define REG_MATCH_NULL_STRING_P(R)   ((R).bits.match_null_string_p)
#define REG_UNSET(e)   ((e) == REG_UNSET_VALUE)
#define REG_UNSET_VALUE   (&reg_unset_dummy)
#define REGEX_ALLOCATE   alloca
#define REGEX_ALLOCATE_STACK   alloca
#define REGEX_FREE(arg)   ((void)0)
#define REGEX_FREE_STACK(arg)
#define REGEX_REALLOCATE(source, osize, nsize)
#define REGEX_REALLOCATE_STACK(source, osize, nsize)   REGEX_REALLOCATE (source, osize, nsize)
#define REGEX_TALLOC(n, t)   ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
#define REMAINING_AVAIL_SLOTS   ((fail_stack).size - (fail_stack).avail)
#define RESET_FAIL_STACK()   REGEX_FREE_STACK (fail_stack.stack)
#define RETALLOC(addr, n, t)   ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
#define RETALLOC_IF(addr, n, t)   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
#define SET_LIST_BIT(c)
#define SET_REGS_MATCHED()
#define SIGN_EXTEND_CHAR(c)   ((((unsigned char) (c)) ^ 128) - 128)
#define STORE_JUMP(op, loc, to)   store_op1 (op, loc, (int) ((to) - (loc) - 3))
#define STORE_JUMP2(op, loc, to, arg)   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
#define STORE_NUMBER(destination, number)
#define STORE_NUMBER_AND_INCR(destination, number)
#define STREQ(s1, s2)   ((strcmp (s1, s2) == 0))
#define SWITCH_ENUM_CAST(x)   (x)
#define Sword   1
#define SYNTAX(c)   re_syntax_table[c]
#define TALLOC(n, t)   ((t *) malloc ((n) * sizeof (t)))
#define TRANSLATE(d)   (translate ? (char) translate[(unsigned char) (d)] : (d))
#define true   1
#define WORDCHAR_P(d)

Typedefs

typedef char boolean
typedef union fail_stack_elt fail_stack_elt_t
typedef long pattern_offset_t
typedef unsigned regnum_t

Enumerations

enum  re_opcode_t {
  no_op = 0, succeed, exactn, anychar,
  charset, charset_not, start_memory, stop_memory,
  duplicate, begline, endline, begbuf,
  endbuf, jump, jump_past_alt, on_failure_jump,
  on_failure_keep_string_jump, pop_failure_jump, maybe_pop_jump, dummy_failure_jump,
  push_dummy_failure, succeed_n, jump_n, set_number_at,
  wordchar, notwordchar, wordbeg, wordend,
  wordbound, notwordbound
}

Functions

static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2, int len, char *translate))
static boolean
alt_match_null_string_p 
_RE_ARGS ((unsigned char *p, unsigned char *end, register_info_type *reg_info))
static boolean
group_match_null_string_p 
_RE_ARGS ((unsigned char **p, unsigned char *end, register_info_type *reg_info))
static boolean
group_in_compile_stack 
_RE_ARGS ((compile_stack_typecompile_stack, regnum_t regnum))
static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, const char *pend, char *translate, reg_syntax_t syntax, unsigned char *b))
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, reg_syntax_t syntax))
static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p, reg_syntax_t syntax))
static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end))
static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg, unsigned char *end))
static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg1, int arg2))
static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg))
static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp))
static boolean alt_match_null_string_p (unsigned char *p, unsigned char *end, register_info_type *reg_info)
static boolean at_begline_loc_p (char *pattern, char *p, reg_syntax_t syntax) const
static boolean at_endline_loc_p (char *p, char *pend, reg_syntax_t syntax) const
static int bcmp_translate (char *s1, char *s2, int len, RE_TRANSLATE_TYPE translate) const
static boolean common_op_match_null_string_p (unsigned char **p, unsigned char *end, register_info_type *reg_info)
static reg_errcode_t compile_range (char **p_ptr, char *pend, RE_TRANSLATE_TYPE translate, reg_syntax_t syntax, unsigned char *b) const
static boolean group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
static boolean group_match_null_string_p (unsigned char **p, unsigned char *end, register_info_type *reg_info)
static void init_syntax_once ()
static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)
int re_compile_fastmap (struct re_pattern_buffer *bufp)
const char * re_compile_pattern (char *pattern, size_t length, struct re_pattern_buffer *bufp) const
int re_match (struct re_pattern_buffer *bufp, const char *string, int size, int pos, struct re_registers *regs)
int re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop)
static int re_match_2_internal (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop)
static int re_match_2_internal ()
int re_search (struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range, struct re_registers *regs)
int re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int startpos, int range, struct re_registers *regs, int stop)
void re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned num_regs, regoff_t *starts, regoff_t *ends)
reg_syntax_t re_set_syntax (reg_syntax_t syntax)
int regcomp (regex_t *preg, const char *pattern, int cflags)
size_t regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
static reg_errcode_t regex_compile (char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp) const
int regexec (regex_t *preg, const char *string, size_t nmatch, pmatch, int eflags) const
void regfree (regex_t *preg)
static void store_op1 (re_opcode_t op, unsigned char *loc, int arg)
static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)

Variables

static const char * re_error_msgid []
int re_max_failures = 20000
reg_syntax_t re_syntax_options
static char re_syntax_table [CHAR_SET_SIZE]
static char reg_unset_dummy

Define Documentation

#define _GNU_SOURCE

Definition at line 33 of file regex.c.

#define assert (  ) 

Definition at line 925 of file regex.c.

#define AT_STRINGS_BEG (  )     ((d) == (size1 ? string1 : string2) || !size2)

Definition at line 3597 of file regex.c.

#define AT_STRINGS_END (  )     ((d) == end2)

Definition at line 3598 of file regex.c.

#define bcmp ( s1,
s2,
 )     memcmp ((s1), (s2), (n))

Definition at line 107 of file regex.c.

#define bcopy ( s,
d,
 )     memcpy ((d), (s), (n))

Definition at line 110 of file regex.c.

#define BUF_PUSH (  ) 
Value:
do {                                    \
    GET_BUFFER_SPACE (1);                       \
    *b++ = (unsigned char) (c);                     \
  } while (0)

Definition at line 1517 of file regex.c.

#define BUF_PUSH_2 ( c1,
c2   ) 
Value:
do {                                    \
    GET_BUFFER_SPACE (2);                       \
    *b++ = (unsigned char) (c1);                    \
    *b++ = (unsigned char) (c2);                    \
  } while (0)

Definition at line 1525 of file regex.c.

#define BUF_PUSH_3 ( c1,
c2,
c3   ) 
Value:
do {                                    \
    GET_BUFFER_SPACE (3);                       \
    *b++ = (unsigned char) (c1);                    \
    *b++ = (unsigned char) (c2);                    \
    *b++ = (unsigned char) (c3);                    \
  } while (0)

Definition at line 1534 of file regex.c.

#define BYTEWIDTH   8

Definition at line 333 of file regex.c.

#define bzero ( s,
 )     memset ((s), 0, (n))

Definition at line 113 of file regex.c.

#define CHAR_CLASS_MAX_LENGTH   6

Definition at line 1690 of file regex.c.

#define CHAR_SET_SIZE   256

Definition at line 141 of file regex.c.

#define COMPILE_STACK_EMPTY   (compile_stack.avail == 0)

Definition at line 1647 of file regex.c.

#define COMPILE_STACK_FULL   (compile_stack.avail == compile_stack.size)

Definition at line 1648 of file regex.c.

#define COMPILE_STACK_TOP   (compile_stack.stack[compile_stack.avail])

Definition at line 1651 of file regex.c.

#define DEBUG_POP ( item_addr   ) 

Definition at line 1194 of file regex.c.

#define DEBUG_PRINT1 (  ) 

Definition at line 928 of file regex.c.

#define DEBUG_PRINT2 ( x1,
x2   ) 

Definition at line 929 of file regex.c.

#define DEBUG_PRINT3 ( x1,
x2,
x3   ) 

Definition at line 930 of file regex.c.

#define DEBUG_PRINT4 ( x1,
x2,
x3,
x4   ) 

Definition at line 931 of file regex.c.

#define DEBUG_PRINT_COMPILED_PATTERN ( p,
s,
 ) 

Definition at line 932 of file regex.c.

#define DEBUG_PRINT_DOUBLE_STRING ( w,
s1,
sz1,
s2,
sz2   ) 

Definition at line 933 of file regex.c.

#define DEBUG_PUSH ( item   ) 

Definition at line 1193 of file regex.c.

#define DEBUG_STATEMENT (  ) 

Definition at line 927 of file regex.c.

#define DOUBLE_FAIL_STACK ( fail_stack   ) 
Value:
((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)   \
   ? 0                                  \
   : ((fail_stack).stack = (fail_stack_elt_t *)             \
        REGEX_REALLOCATE_STACK ((fail_stack).stack,             \
          (fail_stack).size * sizeof (fail_stack_elt_t),        \
          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),    \
                                    \
      (fail_stack).stack == NULL                    \
      ? 0                               \
      : ((fail_stack).size <<= 1,                   \
         1)))

Definition at line 1140 of file regex.c.

#define EVER_MATCHED_SOMETHING (  )     ((R).bits.ever_matched_something)

Definition at line 1423 of file regex.c.

 
#define EXTEND_BUFFER (  ) 
Value:
do {                                    \
    unsigned char *old_buffer = bufp->buffer;               \
    if (bufp->allocated == MAX_BUF_SIZE)                \
      return REG_ESIZE;                         \
    bufp->allocated <<= 1;                      \
    if (bufp->allocated > MAX_BUF_SIZE)                 \
      bufp->allocated = MAX_BUF_SIZE;                   \
    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
    if (bufp->buffer == NULL)                       \
      return REG_ESPACE;                        \
    /* If the buffer moved, move all the pointers into it.  */      \
    if (old_buffer != bufp->buffer)                 \
      {                                 \
        b = (b - old_buffer) + bufp->buffer;                \
        begalt = (begalt - old_buffer) + bufp->buffer;          \
        if (fixup_alt_jump)                     \
          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
        if (laststart)                          \
          laststart = (laststart - old_buffer) + bufp->buffer;      \
        if (pending_exact)                      \
          pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
      }                                 \
  } while (0)

Definition at line 1584 of file regex.c.

#define EXTRACT_NUMBER ( destination,
source   ) 
Value:
do {                                    \
    (destination) = *(source) & 0377;                   \
    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;       \
  } while (0)

Definition at line 510 of file regex.c.

#define EXTRACT_NUMBER_AND_INCR ( destination,
source   ) 
Value:
do {                                    \
    EXTRACT_NUMBER (destination, source);               \
    (source) += 2;                          \
  } while (0)

Definition at line 538 of file regex.c.

 
#define FAIL_STACK_EMPTY (  )     (fail_stack.avail == 0)

Definition at line 1101 of file regex.c.

 
#define FAIL_STACK_FULL (  )     (fail_stack.avail == fail_stack.size)

Definition at line 1103 of file regex.c.

 
#define FAIL_STACK_PTR_EMPTY (  )     (fail_stack_ptr->avail == 0)

Definition at line 1102 of file regex.c.

#define false   0

Definition at line 343 of file regex.c.

#define FIRST_STRING_P ( ptr   )     (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)

Definition at line 323 of file regex.c.

#define FREE_STACK_RETURN ( value   )     return (free (compile_stack.stack), value)

Definition at line 1772 of file regex.c.

#define FREE_VAR ( var   )     if (var) REGEX_FREE (var); var = NULL

Definition at line 3621 of file regex.c.

 
#define FREE_VARIABLES (  ) 
Value:
do {                                    \
    REGEX_FREE_STACK (fail_stack.stack);                \
    FREE_VAR (regstart);                        \
    FREE_VAR (regend);                          \
    FREE_VAR (old_regstart);                        \
    FREE_VAR (old_regend);                      \
    FREE_VAR (best_regstart);                       \
    FREE_VAR (best_regend);                     \
    FREE_VAR (reg_info);                        \
    FREE_VAR (reg_dummy);                       \
    FREE_VAR (reg_info_dummy);                      \
  } while (0)

Definition at line 3622 of file regex.c.

#define GET_BUFFER_SPACE (  ) 
Value:
while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)  \
      EXTEND_BUFFER ()

Definition at line 1512 of file regex.c.

#define GET_UNSIGNED_NUMBER ( num   ) 
Value:
{ if (p != pend)                            \
     {                                  \
       PATFETCH (c);                            \
       while (ISDIGIT (c))                      \
         {                              \
           if (num < 0)                         \
              num = 0;                          \
           num = num * 10 + c - '0';                    \
           if (p == pend)                       \
              break;                            \
           PATFETCH (c);                        \
         }                              \
       }                                \
    }

Definition at line 1661 of file regex.c.

#define gettext ( msgid   )     (msgid)

Definition at line 58 of file regex.c.

#define gettext_noop ( String   )     String

Definition at line 64 of file regex.c.

#define HAVE_STRING_H

Definition at line 102 of file regex.c.

#define INIT_BUF_SIZE   32

Definition at line 1509 of file regex.c.

#define INIT_COMPILE_STACK_SIZE   32

Definition at line 1645 of file regex.c.

 
#define INIT_FAIL_STACK (  ) 
Value:
do {                                    \
    fail_stack.stack = (fail_stack_elt_t *)             \
      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));    \
                                    \
    if (fail_stack.stack == NULL)                   \
      return -2;                            \
                                    \
    fail_stack.size = INIT_FAILURE_ALLOC;               \
    fail_stack.avail = 0;                       \
  } while (0)

Definition at line 1110 of file regex.c.

#define INIT_FAILURE_ALLOC   5

Definition at line 1041 of file regex.c.

#define INSERT_JUMP ( op,
loc,
to   )     insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)

Definition at line 1553 of file regex.c.

#define INSERT_JUMP2 ( op,
loc,
to,
arg   )     insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)

Definition at line 1557 of file regex.c.

#define IS_ACTIVE (  )     ((R).bits.is_active)

Definition at line 1421 of file regex.c.

#define IS_CHAR_CLASS ( string   ) 
Value:
(STREQ (string, "alpha") || STREQ (string, "upper")         \
    || STREQ (string, "lower") || STREQ (string, "digit")       \
    || STREQ (string, "alnum") || STREQ (string, "xdigit")      \
    || STREQ (string, "space") || STREQ (string, "print")       \
    || STREQ (string, "punct") || STREQ (string, "graph")       \
    || STREQ (string, "cntrl") || STREQ (string, "blank"))

Definition at line 1692 of file regex.c.

#define ISALNUM (  )     (ISASCII (c) && isalnum (c))

Definition at line 212 of file regex.c.

#define ISALPHA (  )     (ISASCII (c) && isalpha (c))

Definition at line 213 of file regex.c.

#define ISASCII (  )     1

Definition at line 194 of file regex.c.

#define ISBLANK (  )     ((c) == ' ' || (c) == '\t')

Definition at line 202 of file regex.c.

#define ISCNTRL (  )     (ISASCII (c) && iscntrl (c))

Definition at line 214 of file regex.c.

#define ISDIGIT (  )     (ISASCII (c) && isdigit (c))

Definition at line 211 of file regex.c.

#define ISGRAPH (  )     (ISASCII (c) && isprint (c) && !isspace (c))

Definition at line 207 of file regex.c.

#define ISLOWER (  )     (ISASCII (c) && islower (c))

Definition at line 215 of file regex.c.

#define ISPRINT (  )     (ISASCII (c) && isprint (c))

Definition at line 210 of file regex.c.

#define ISPUNCT (  )     (ISASCII (c) && ispunct (c))

Definition at line 216 of file regex.c.

#define ISSPACE (  )     (ISASCII (c) && isspace (c))

Definition at line 217 of file regex.c.

#define ISUPPER (  )     (ISASCII (c) && isupper (c))

Definition at line 218 of file regex.c.

#define ISXDIGIT (  )     (ISASCII (c) && isxdigit (c))

Definition at line 219 of file regex.c.

#define MATCH_MAY_ALLOCATE

Definition at line 1014 of file regex.c.

#define MATCH_NULL_UNSET_VALUE   3

Definition at line 1412 of file regex.c.

#define MATCHED_SOMETHING (  )     ((R).bits.matched_something)

Definition at line 1422 of file regex.c.

#define MATCHING_IN_FIRST_STRING   (dend == end_match_1)

Definition at line 3579 of file regex.c.

#define MAX ( a,
 )     ((a) > (b) ? (a) : (b))

Definition at line 339 of file regex.c.

#define MAX_BUF_SIZE   (1L << 16)

Definition at line 1576 of file regex.c.

#define MAX_FAILURE_ITEMS   (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)

Definition at line 1297 of file regex.c.

#define MAX_REGNUM   255

Definition at line 1613 of file regex.c.

#define MIN ( a,
 )     ((a) < (b) ? (a) : (b))

Definition at line 340 of file regex.c.

#define NO_HIGHEST_ACTIVE_REG   (1 << BYTEWIDTH)

Definition at line 3646 of file regex.c.

#define NO_LOWEST_ACTIVE_REG   (NO_HIGHEST_ACTIVE_REG + 1)

Definition at line 3647 of file regex.c.

#define NULL   (void *)0

Definition at line 222 of file regex.c.

#define NUM_FAILURE_ITEMS
Value:
(((0                            \
     ? 0 : highest_active_reg - lowest_active_reg + 1)  \
    * NUM_REG_ITEMS)                    \
   + NUM_NONREG_ITEMS)

Definition at line 1300 of file regex.c.

#define NUM_NONREG_ITEMS   4

Definition at line 1290 of file regex.c.

#define NUM_REG_ITEMS   3

Definition at line 1284 of file regex.c.

#define PATFETCH (  ) 
Value:
do {if (p == pend) return REG_EEND;                 \
    c = (unsigned char) *p++;                       \
    if (translate) c = (unsigned char) translate[c];            \
  } while (0)

Definition at line 1478 of file regex.c.

#define PATFETCH_RAW (  ) 
Value:
do {if (p == pend) return REG_EEND;                 \
    c = (unsigned char) *p++;                       \
  } while (0)

Definition at line 1487 of file regex.c.

#define PATUNFETCH   p--

Definition at line 1493 of file regex.c.

#define POINTER_TO_OFFSET ( ptr   ) 
Value:
(FIRST_STRING_P (ptr)               \
   ? ((regoff_t) ((ptr) - string1))     \
   : ((regoff_t) ((ptr) - string2 + size1)))

Definition at line 3572 of file regex.c.

 
#define POP_FAILURE_ELT (  )     fail_stack.stack[--fail_stack.avail]

Definition at line 1186 of file regex.c.

 
#define POP_FAILURE_INT (  )     fail_stack.stack[--fail_stack.avail].integer

Definition at line 1185 of file regex.c.

#define POP_FAILURE_POINT ( str,
pat,
low_reg,
high_reg,
regstart,
regend,
reg_info   ) 

Definition at line 1322 of file regex.c.

 
#define POP_FAILURE_POINTER (  )     fail_stack.stack[--fail_stack.avail].pointer

Definition at line 1184 of file regex.c.

 
#define PREFETCH (  ) 
Value:
while (d == dend)                               \
    {                                   \
      /* End of string2 => fail.  */                    \
      if (dend == end_match_2)                      \
        goto fail;                          \
      /* End of string1 => advance to string2.  */          \
      d = string2;                              \
      dend = end_match_2;                       \
    }

Definition at line 3583 of file regex.c.

#define PUSH_FAILURE_ELT ( item   )     fail_stack.stack[fail_stack.avail++] = (item)

Definition at line 1179 of file regex.c.

#define PUSH_FAILURE_INT ( item   )     fail_stack.stack[fail_stack.avail++].integer = (item)

Definition at line 1173 of file regex.c.

#define PUSH_FAILURE_POINT ( pattern_place,
string_place,
failure_code   ) 

Definition at line 1207 of file regex.c.

#define PUSH_FAILURE_POINTER ( item   )     fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)

Definition at line 1167 of file regex.c.

#define PUSH_PATTERN_OP ( POINTER,
FAIL_STACK   ) 
Value:
((FAIL_STACK_FULL ()                            \
    && !DOUBLE_FAIL_STACK (FAIL_STACK))                 \
   ? 0                                  \
   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,   \
      1))

Definition at line 1157 of file regex.c.

#define REALLOC ( p,
 )     realloc ((p), (s))

Definition at line 1577 of file regex.c.

#define REG_MATCH_NULL_STRING_P (  )     ((R).bits.match_null_string_p)

Definition at line 1420 of file regex.c.

#define REG_UNSET (  )     ((e) == REG_UNSET_VALUE)

Definition at line 1449 of file regex.c.

#define REG_UNSET_VALUE   (&reg_unset_dummy)

Definition at line 1448 of file regex.c.

#define REGEX_ALLOCATE   alloca

Definition at line 275 of file regex.c.

#define REGEX_ALLOCATE_STACK   alloca

Definition at line 309 of file regex.c.

#define REGEX_FREE ( arg   )     ((void)0)

Definition at line 284 of file regex.c.

#define REGEX_FREE_STACK ( arg   ) 

Definition at line 314 of file regex.c.

#define REGEX_REALLOCATE ( source,
osize,
nsize   ) 
Value:
(destination = (char *) alloca (nsize),             \
   bcopy (source, destination, osize),                  \
   destination)

Definition at line 278 of file regex.c.

#define REGEX_REALLOCATE_STACK ( source,
osize,
nsize   )     REGEX_REALLOCATE (source, osize, nsize)

Definition at line 311 of file regex.c.

#define REGEX_TALLOC ( n,
 )     ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))

Definition at line 331 of file regex.c.

#define REMAINING_AVAIL_SLOTS   ((fail_stack).size - (fail_stack).avail)

Definition at line 1307 of file regex.c.

 
#define RESET_FAIL_STACK (  )     REGEX_FREE_STACK (fail_stack.stack)

Definition at line 1122 of file regex.c.

#define RETALLOC ( addr,
n,
 )     ((addr) = (t *) realloc (addr, (n) * sizeof (t)))

Definition at line 328 of file regex.c.

#define RETALLOC_IF ( addr,
n,
 )     if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)

Definition at line 329 of file regex.c.

#define SET_LIST_BIT (  ) 
Value:
(b[((unsigned char) (c)) / BYTEWIDTH]               \
   |= 1 << (((unsigned char) c) % BYTEWIDTH))

Definition at line 1655 of file regex.c.

 
#define SET_REGS_MATCHED (  ) 
Value:
do                                  \
    {                                   \
      if (!set_regs_matched_done)                   \
    {                               \
      active_reg_t r;                       \
      set_regs_matched_done = 1;                    \
      for (r = lowest_active_reg; r <= highest_active_reg; r++) \
        {                               \
          MATCHED_SOMETHING (reg_info[r])               \
        = EVER_MATCHED_SOMETHING (reg_info[r])          \
        = 1;                            \
        }                               \
    }                               \
    }                                   \
  while (0)

Definition at line 1429 of file regex.c.

#define SIGN_EXTEND_CHAR (  )     ((((unsigned char) (c)) ^ 128) - 128)

Definition at line 234 of file regex.c.

#define STORE_JUMP ( op,
loc,
to   )     store_op1 (op, loc, (int) ((to) - (loc) - 3))

Definition at line 1545 of file regex.c.

#define STORE_JUMP2 ( op,
loc,
to,
arg   )     store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)

Definition at line 1549 of file regex.c.

#define STORE_NUMBER ( destination,
number   ) 
Value:
do {                                    \
    (destination)[0] = (number) & 0377;                 \
    (destination)[1] = (number) >> 8;                   \
  } while (0)

Definition at line 491 of file regex.c.

#define STORE_NUMBER_AND_INCR ( destination,
number   ) 
Value:
do {                                    \
    STORE_NUMBER (destination, number);                 \
    (destination) += 2;                         \
  } while (0)

Definition at line 501 of file regex.c.

#define STREQ ( s1,
s2   )     ((strcmp (s1, s2) == 0))

Definition at line 335 of file regex.c.

#define SWITCH_ENUM_CAST (  )     (x)

Definition at line 131 of file regex.c.

#define Sword   1

Definition at line 125 of file regex.c.

#define SYNTAX (  )     re_syntax_table[c]

Definition at line 172 of file regex.c.

#define TALLOC ( n,
 )     ((t *) malloc ((n) * sizeof (t)))

Definition at line 327 of file regex.c.

#define TRANSLATE (  )     (translate ? (char) translate[(unsigned char) (d)] : (d))

Definition at line 1501 of file regex.c.

#define true   1

Definition at line 344 of file regex.c.

#define WORDCHAR_P (  ) 
Value:
(SYNTAX ((d) == end1 ? *string2                 \
           : (d) == string2 - 1 ? *(end1 - 1) : *(d))           \
   == Sword)

Definition at line 3605 of file regex.c.


Typedef Documentation

typedef char boolean

Definition at line 342 of file regex.c.

Definition at line 1090 of file regex.c.

typedef long pattern_offset_t

Definition at line 1625 of file regex.c.

typedef unsigned regnum_t

Definition at line 1617 of file regex.c.


Enumeration Type Documentation

Enumerator:
no_op 
succeed 
exactn 
anychar 
charset 
charset_not 
start_memory 
stop_memory 
duplicate 
begline 
endline 
begbuf 
endbuf 
jump 
jump_past_alt 
on_failure_jump 
on_failure_keep_string_jump 
pop_failure_jump 
maybe_pop_jump 
dummy_failure_jump 
push_dummy_failure 
succeed_n 
jump_n 
set_number_at 
wordchar 
notwordchar 
wordbeg 
wordend 
wordbound 
notwordbound 

Definition at line 353 of file regex.c.

00354 {
00355   no_op = 0,
00356 
00357   /* Succeed right away--no more backtracking.  */
00358   succeed,
00359 
00360         /* Followed by one byte giving n, then by n literal bytes.  */
00361   exactn,
00362 
00363         /* Matches any (more or less) character.  */
00364   anychar,
00365 
00366         /* Matches any one char belonging to specified set.  First
00367            following byte is number of bitmap bytes.  Then come bytes
00368            for a bitmap saying which chars are in.  Bits in each byte
00369            are ordered low-bit-first.  A character is in the set if its
00370            bit is 1.  A character too large to have a bit in the map is
00371            automatically not in the set.  */
00372   charset,
00373 
00374         /* Same parameters as charset, but match any character that is
00375            not one of those specified.  */
00376   charset_not,
00377 
00378         /* Start remembering the text that is matched, for storing in a
00379            register.  Followed by one byte with the register number, in
00380            the range 0 to one less than the pattern buffer's re_nsub
00381            field.  Then followed by one byte with the number of groups
00382            inner to this one.  (This last has to be part of the
00383            start_memory only because we need it in the on_failure_jump
00384            of re_match_2.)  */
00385   start_memory,
00386 
00387         /* Stop remembering the text that is matched and store it in a
00388            memory register.  Followed by one byte with the register
00389            number, in the range 0 to one less than `re_nsub' in the
00390            pattern buffer, and one byte with the number of inner groups,
00391            just like `start_memory'.  (We need the number of inner
00392            groups here because we don't have any easy way of finding the
00393            corresponding start_memory when we're at a stop_memory.)  */
00394   stop_memory,
00395 
00396         /* Match a duplicate of something remembered. Followed by one
00397            byte containing the register number.  */
00398   duplicate,
00399 
00400         /* Fail unless at beginning of line.  */
00401   begline,
00402 
00403         /* Fail unless at end of line.  */
00404   endline,
00405 
00406         /* Succeeds if at beginning of buffer (if emacs) or at beginning
00407            of string to be matched (if not).  */
00408   begbuf,
00409 
00410         /* Analogously, for end of buffer/string.  */
00411   endbuf,
00412 
00413         /* Followed by two byte relative address to which to jump.  */
00414   jump,
00415 
00416     /* Same as jump, but marks the end of an alternative.  */
00417   jump_past_alt,
00418 
00419         /* Followed by two-byte relative address of place to resume at
00420            in case of failure.  */
00421   on_failure_jump,
00422 
00423         /* Like on_failure_jump, but pushes a placeholder instead of the
00424            current string position when executed.  */
00425   on_failure_keep_string_jump,
00426 
00427         /* Throw away latest failure point and then jump to following
00428            two-byte relative address.  */
00429   pop_failure_jump,
00430 
00431         /* Change to pop_failure_jump if know won't have to backtrack to
00432            match; otherwise change to jump.  This is used to jump
00433            back to the beginning of a repeat.  If what follows this jump
00434            clearly won't match what the repeat does, such that we can be
00435            sure that there is no use backtracking out of repetitions
00436            already matched, then we change it to a pop_failure_jump.
00437            Followed by two-byte address.  */
00438   maybe_pop_jump,
00439 
00440         /* Jump to following two-byte address, and push a dummy failure
00441            point. This failure point will be thrown away if an attempt
00442            is made to use it for a failure.  A `+' construct makes this
00443            before the first repeat.  Also used as an intermediary kind
00444            of jump when compiling an alternative.  */
00445   dummy_failure_jump,
00446 
00447     /* Push a dummy failure point and continue.  Used at the end of
00448        alternatives.  */
00449   push_dummy_failure,
00450 
00451         /* Followed by two-byte relative address and two-byte number n.
00452            After matching N times, jump to the address upon failure.  */
00453   succeed_n,
00454 
00455         /* Followed by two-byte relative address, and two-byte number n.
00456            Jump to the address N times, then fail.  */
00457   jump_n,
00458 
00459         /* Set the following two-byte relative address to the
00460            subsequent two-byte number.  The address *includes* the two
00461            bytes of number.  */
00462   set_number_at,
00463 
00464   wordchar, /* Matches any word-constituent character.  */
00465   notwordchar,  /* Matches any char that is not a word-constituent.  */
00466 
00467   wordbeg,  /* Succeeds if at word beginning.  */
00468   wordend,  /* Succeeds if at word end.  */
00469 
00470   wordbound,    /* Succeeds if at a word boundary.  */
00471   notwordbound  /* Succeeds if not at a word boundary.  */
00472 
00473 #ifdef emacs
00474   ,before_dot,  /* Succeeds if before point.  */
00475   at_dot,   /* Succeeds if at point.  */
00476   after_dot,    /* Succeeds if after point.  */
00477 
00478     /* Matches any character whose syntax is specified.  Followed by
00479            a byte which contains a syntax code, e.g., Sword.  */
00480   syntaxspec,
00481 
00482     /* Matches any character whose syntax is not that specified.  */
00483   notsyntaxspec
00484 #endif /* emacs */
00485 } re_opcode_t;


Function Documentation

static int bcmp_translate _RE_ARGS ( (const char *s1, const char *s2, int len, char *translate)   )  [static]
static boolean alt_match_null_string_p _RE_ARGS ( (unsigned char *p, unsigned char *end, register_info_type *reg_info)   )  [static]
static boolean common_op_match_null_string_p _RE_ARGS ( (unsigned char **p, unsigned char *end, register_info_type *reg_info)   )  [static]
static boolean group_in_compile_stack _RE_ARGS ( (compile_stack_typecompile_stack, regnum_t regnum)   )  [static]
static reg_errcode_t compile_range _RE_ARGS ( (const char **p_ptr, const char *pend, char *translate, reg_syntax_t syntax, unsigned char *b)   )  [static]
static boolean at_endline_loc_p _RE_ARGS ( (const char *p, const char *pend, reg_syntax_t syntax)   )  [static]
static boolean at_begline_loc_p _RE_ARGS ( (const char *pattern, const char *p, reg_syntax_t syntax)   )  [static]
static void insert_op2 _RE_ARGS ( (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)   )  [static]
static void insert_op1 _RE_ARGS ( (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)   )  [static]
static void store_op2 _RE_ARGS ( (re_opcode_t op, unsigned char *loc, int arg1, int arg2)   )  [static]
static void store_op1 _RE_ARGS ( (re_opcode_t op, unsigned char *loc, int arg)   )  [static]
static reg_errcode_t regex_compile _RE_ARGS ( (const char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)   )  [static]
static boolean alt_match_null_string_p ( unsigned char *  p,
unsigned char *  end,
register_info_type reg_info 
) [static]

Definition at line 5231 of file regex.c.

05231                                                                            :
05232    It expects P to be the first byte of a single alternative and END one
05233    byte past the last. The alternative can contain groups.  */
05234 
05235 static boolean
05236 alt_match_null_string_p (p, end, reg_info)
05237     unsigned char *p, *end;
05238     register_info_type *reg_info;
05239 {
05240   int mcnt;
05241   unsigned char *p1 = p;
05242 
05243   while (p1 < end)
05244     {
05245       /* Skip over opcodes that can match nothing, and break when we get
05246          to one that can't.  */
05247 
05248       switch ((re_opcode_t) *p1)
05249         {
05250     /* It's a loop.  */
05251         case on_failure_jump:
05252           p1++;
05253           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05254           p1 += mcnt;
05255           break;
05256 
05257     default:
05258           if (!common_op_match_null_string_p (&p1, end, reg_info))
05259             return false;
        }

static boolean at_begline_loc_p ( char *  pattern,
char *  p,
reg_syntax_t  syntax 
) const [static]

Definition at line 2944 of file regex.c.

02952 {
02953   const char *prev = p - 2;
02954   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
02955 
02956   return

static boolean at_endline_loc_p ( char *  p,
char *  pend,
reg_syntax_t  syntax 
) const [static]

Definition at line 2963 of file regex.c.

02971 {
02972   const char *next = p;
02973   boolean next_backslash = *next == '\\';
02974   const char *next_next = p + 1 < pend ? p + 1 : 0;
02975 
02976   return
02977        /* Before a subexpression?  */
02978        (syntax & RE_NO_BK_PARENS ? *next == ')'

static int bcmp_translate ( char *  s1,
char *  s2,
int  len,
RE_TRANSLATE_TYPE  translate 
) const [static]

Definition at line 5356 of file regex.c.

05365 {
05366   register const unsigned char *p1 = (const unsigned char *) s1;
05367   register const unsigned char *p2 = (const unsigned char *) s2;
05368   while (len)
05369     {

static boolean common_op_match_null_string_p ( unsigned char **  p,
unsigned char *  end,
register_info_type reg_info 
) [static]

Definition at line 5268 of file regex.c.

05276 {
05277   int mcnt;
05278   boolean ret;
05279   int reg_no;
05280   unsigned char *p1 = *p;
05281 
05282   switch ((re_opcode_t) *p1++)
05283     {
05284     case no_op:
05285     case begline:
05286     case endline:
05287     case begbuf:
05288     case endbuf:
05289     case wordbeg:
05290     case wordend:
05291     case wordbound:
05292     case notwordbound:
05293 #ifdef emacs
05294     case before_dot:
05295     case at_dot:
05296     case after_dot:
05297 #endif
05298       break;
05299 
05300     case start_memory:
05301       reg_no = *p1;
05302       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
05303       ret = group_match_null_string_p (&p1, end, reg_info);
05304 
05305       /* Have to set this here in case we're checking a group which
05306          contains a group and a back reference to it.  */
05307 
05308       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
05309         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
05310 
05311       if (!ret)
05312         return false;
05313       break;
05314 
05315     /* If this is an optimized succeed_n for zero times, make the jump.  */
05316     case jump:
05317       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05318       if (mcnt >= 0)
05319         p1 += mcnt;
05320       else
05321         return false;
05322       break;
05323 
05324     case succeed_n:
05325       /* Get to the number of times to succeed.  */
05326       p1 += 2;
05327       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05328 
05329       if (mcnt == 0)
05330         {
05331           p1 -= 4;
05332           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05333           p1 += mcnt;
05334         }
05335       else
05336         return false;
05337       break;
05338 
05339     case duplicate:
05340       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
05341         return false;
05342       break;
05343 
05344     case set_number_at:
05345       p1 += 4;
05346 
05347     default:
05348       /* All other opcodes mean we cannot match the empty string.  */
05349       return false;

static reg_errcode_t compile_range ( char **  p_ptr,
char *  pend,
RE_TRANSLATE_TYPE  translate,
reg_syntax_t  syntax,
unsigned char *  b 
) const [static]

Definition at line 3013 of file regex.c.

03023 {
03024   unsigned this_char;
03025 
03026   const char *p = *p_ptr;
03027   unsigned int range_start, range_end;
03028 
03029   if (p == pend)
03030     return REG_ERANGE;
03031 
03032   /* Even though the pattern is a signed `char *', we need to fetch
03033      with unsigned char *'s; if the high bit of the pattern character
03034      is set, the range endpoints will be negative if we fetch using a
03035      signed char *.
03036 
03037      We also want to fetch the endpoints without translating them; the
03038      appropriate translation is done in the bit-setting loop below.  */
03039   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
03040   range_start = ((const unsigned char *) p)[-2];
03041   range_end   = ((const unsigned char *) p)[0];
03042 
03043   /* Have to increment the pointer into the pattern string, so the
03044      caller isn't still at the ending character.  */
03045   (*p_ptr)++;
03046 
03047   /* If the start is after the end, the range is empty.  */
03048   if (range_start > range_end)
03049     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
03050 
03051   /* Here we see why `this_char' has to be larger than an `unsigned
03052      char' -- the range is inclusive, so if `range_end' == 0xff
03053      (assuming 8-bit characters), we would otherwise go into an infinite
03054      loop, since all characters <= 0xff.  */
03055   for (this_char = range_start; this_char <= range_end; this_char++)
03056     {

static boolean group_in_compile_stack ( compile_stack_type  compile_stack,
regnum_t  regnum 
) [static]

Definition at line 2985 of file regex.c.

02993 {
02994   int this_element;
02995 
02996   for (this_element = compile_stack.avail - 1;
02997        this_element >= 0;
02998        this_element--)

static boolean group_match_null_string_p ( unsigned char **  p,
unsigned char *  end,
register_info_type reg_info 
) [static]

Definition at line 5122 of file regex.c.

05130 {
05131   int mcnt;
05132   /* Point to after the args to the start_memory.  */
05133   unsigned char *p1 = *p + 2;
05134 
05135   while (p1 < end)
05136     {
05137       /* Skip over opcodes that can match nothing, and return true or
05138      false, as appropriate, when we get to one that can't, or to the
05139          matching stop_memory.  */
05140 
05141       switch ((re_opcode_t) *p1)
05142         {
05143         /* Could be either a loop or a series of alternatives.  */
05144         case on_failure_jump:
05145           p1++;
05146           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05147 
05148           /* If the next operation is not a jump backwards in the
05149          pattern.  */
05150 
05151       if (mcnt >= 0)
05152         {
05153               /* Go through the on_failure_jumps of the alternatives,
05154                  seeing if any of the alternatives cannot match nothing.
05155                  The last alternative starts with only a jump,
05156                  whereas the rest start with on_failure_jump and end
05157                  with a jump, e.g., here is the pattern for `a|b|c':
05158 
05159                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
05160                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
05161                  /exactn/1/c
05162 
05163                  So, we have to first go through the first (n-1)
05164                  alternatives and then deal with the last one separately.  */
05165 
05166 
05167               /* Deal with the first (n-1) alternatives, which start
05168                  with an on_failure_jump (see above) that jumps to right
05169                  past a jump_past_alt.  */
05170 
05171               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
05172                 {
05173                   /* `mcnt' holds how many bytes long the alternative
05174                      is, including the ending `jump_past_alt' and
05175                      its number.  */
05176 
05177                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
05178                                       reg_info))
05179                     return false;
05180 
05181                   /* Move to right after this alternative, including the
05182              jump_past_alt.  */
05183                   p1 += mcnt;
05184 
05185                   /* Break if it's the beginning of an n-th alternative
05186                      that doesn't begin with an on_failure_jump.  */
05187                   if ((re_opcode_t) *p1 != on_failure_jump)
05188                     break;
05189 
05190           /* Still have to check that it's not an n-th
05191              alternative that starts with an on_failure_jump.  */
05192           p1++;
05193                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05194                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
05195                     {
05196               /* Get to the beginning of the n-th alternative.  */
05197                       p1 -= 3;
05198                       break;
05199                     }
05200                 }
05201 
05202               /* Deal with the last alternative: go back and get number
05203                  of the `jump_past_alt' just before it.  `mcnt' contains
05204                  the length of the alternative.  */
05205               EXTRACT_NUMBER (mcnt, p1 - 2);
05206 
05207               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
05208                 return false;
05209 
05210               p1 += mcnt;   /* Get past the n-th alternative.  */
05211             } /* if mcnt > 0 */
05212           break;
05213 
05214 
05215         case stop_memory:
05216       assert (p1[1] == **p);
05217           *p = p1 + 2;
05218           return true;
05219 
05220 
05221         default:
05222           if (!common_op_match_null_string_p (&p1, end, reg_info))
05223             return false;

static void init_syntax_once (  )  [static]

Definition at line 146 of file regex.c.

00147 {
00148    register int c;
00149    static int done = 0;
00150 
00151    if (done)
00152      return;
00153 
00154    bzero (re_syntax_table, sizeof re_syntax_table);
00155 
00156    for (c = 'a'; c <= 'z'; c++)
00157      re_syntax_table[c] = Sword;
00158 
00159    for (c = 'A'; c <= 'Z'; c++)
00160      re_syntax_table[c] = Sword;
00161 
00162    for (c = '0'; c <= '9'; c++)
00163      re_syntax_table[c] = Sword;
00164 
00165    re_syntax_table['_'] = Sword;
00166 
00167    done = 1;
00168 }

static void insert_op1 ( re_opcode_t  op,
unsigned char *  loc,
int  arg,
unsigned char *  end 
) [static]

Definition at line 2904 of file regex.c.

02914 {
02915   register unsigned char *pfrom = end;
02916   register unsigned char *pto = end + 3;
02917 

static void insert_op2 ( re_opcode_t  op,
unsigned char *  loc,
int  arg1,
int  arg2,
unsigned char *  end 
) [static]

Definition at line 2923 of file regex.c.

02933 {
02934   register unsigned char *pfrom = end;
02935   register unsigned char *pto = end + 5;
02936 

int re_compile_fastmap ( struct re_pattern_buffer bufp  ) 

Definition at line 3072 of file regex.c.

03079 {
03080   int j, k;
03081 #ifdef MATCH_MAY_ALLOCATE
03082   fail_stack_type fail_stack;
03083 #endif
03084 #ifndef REGEX_MALLOC
03085   char *destination;
03086 #endif
03087   /* We don't push any register information onto the failure stack.  */
03088 //sword  unsigned num_regs = 0;
03089 
03090   register char *fastmap = bufp->fastmap;
03091   unsigned char *pattern = bufp->buffer;
03092   unsigned char *p = pattern;
03093   register unsigned char *pend = pattern + bufp->used;
03094 
03095 #ifdef REL_ALLOC
03096   /* This holds the pointer to the failure stack, when
03097      it is allocated relocatably.  */
03098   fail_stack_elt_t *failure_stack_ptr;
03099 #endif
03100 
03101   /* Assume that each path through the pattern can be null until
03102      proven otherwise.  We set this false at the bottom of switch
03103      statement, to which we get only if a particular path doesn't
03104      match the empty string.  */
03105   boolean path_can_be_null = true;
03106 
03107   /* We aren't doing a `succeed_n' to begin with.  */
03108   boolean succeed_n_p = false;
03109 
03110   assert (fastmap != NULL && p != NULL);
03111 
03112   INIT_FAIL_STACK ();
03113   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
03114   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
03115   bufp->can_be_null = 0;
03116 
03117   while (1)
03118     {
03119       if (p == pend || *p == succeed)
03120     {
03121       /* We have reached the (effective) end of pattern.  */
03122       if (!FAIL_STACK_EMPTY ())
03123         {
03124           bufp->can_be_null |= path_can_be_null;
03125 
03126           /* Reset for next path.  */
03127           path_can_be_null = true;
03128 
03129           p = fail_stack.stack[--fail_stack.avail].pointer;
03130 
03131           continue;
03132         }
03133       else
03134         break;
03135     }
03136 
03137       /* We should never be about to go beyond the end of the pattern.  */
03138       assert (p < pend);
03139 
03140       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
03141     {
03142 
03143         /* I guess the idea here is to simply not bother with a fastmap
03144            if a backreference is used, since it's too hard to figure out
03145            the fastmap for the corresponding group.  Setting
03146            `can_be_null' stops `re_search_2' from using the fastmap, so
03147            that is all we do.  */
03148     case duplicate:
03149       bufp->can_be_null = 1;
03150           goto done;
03151 
03152 
03153       /* Following are the cases which match a character.  These end
03154          with `break'.  */
03155 
03156     case exactn:
03157           fastmap[p[1]] = 1;
03158       break;
03159 
03160 
03161         case charset:
03162           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03163         if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
03164               fastmap[j] = 1;
03165       break;
03166 
03167 
03168     case charset_not:
03169       /* Chars beyond end of map must be allowed.  */
03170       for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
03171             fastmap[j] = 1;
03172 
03173       for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03174         if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
03175               fastmap[j] = 1;
03176           break;
03177 
03178 
03179     case wordchar:
03180       for (j = 0; j < (1 << BYTEWIDTH); j++)
03181         if (SYNTAX (j) == Sword)
03182           fastmap[j] = 1;
03183       break;
03184 
03185 
03186     case notwordchar:
03187       for (j = 0; j < (1 << BYTEWIDTH); j++)
03188         if (SYNTAX (j) != Sword)
03189           fastmap[j] = 1;
03190       break;
03191 
03192 
03193         case anychar:
03194       {
03195         int fastmap_newline = fastmap['\n'];
03196 
03197         /* `.' matches anything ...  */
03198         for (j = 0; j < (1 << BYTEWIDTH); j++)
03199           fastmap[j] = 1;
03200 
03201         /* ... except perhaps newline.  */
03202         if (!(bufp->syntax & RE_DOT_NEWLINE))
03203           fastmap['\n'] = fastmap_newline;
03204 
03205         /* Return if we have already set `can_be_null'; if we have,
03206            then the fastmap is irrelevant.  Something's wrong here.  */
03207         else if (bufp->can_be_null)
03208           goto done;
03209 
03210         /* Otherwise, have to check alternative paths.  */
03211         break;
03212       }
03213 
03214 #ifdef emacs
03215         case syntaxspec:
03216       k = *p++;
03217       for (j = 0; j < (1 << BYTEWIDTH); j++)
03218         if (SYNTAX (j) == (enum syntaxcode) k)
03219           fastmap[j] = 1;
03220       break;
03221 
03222 
03223     case notsyntaxspec:
03224       k = *p++;
03225       for (j = 0; j < (1 << BYTEWIDTH); j++)
03226         if (SYNTAX (j) != (enum syntaxcode) k)
03227           fastmap[j] = 1;
03228       break;
03229 
03230 
03231       /* All cases after this match the empty string.  These end with
03232          `continue'.  */
03233 
03234 
03235     case before_dot:
03236     case at_dot:
03237     case after_dot:
03238           continue;
03239 #endif /* emacs */
03240 
03241 
03242         case no_op:
03243         case begline:
03244         case endline:
03245     case begbuf:
03246     case endbuf:
03247     case wordbound:
03248     case notwordbound:
03249     case wordbeg:
03250     case wordend:
03251         case push_dummy_failure:
03252           continue;
03253 
03254 
03255     case jump_n:
03256         case pop_failure_jump:
03257     case maybe_pop_jump:
03258     case jump:
03259         case jump_past_alt:
03260     case dummy_failure_jump:
03261           EXTRACT_NUMBER_AND_INCR (j, p);
03262       p += j;
03263       if (j > 0)
03264         continue;
03265 
03266           /* Jump backward implies we just went through the body of a
03267              loop and matched nothing.  Opcode jumped to should be
03268              `on_failure_jump' or `succeed_n'.  Just treat it like an
03269              ordinary jump.  For a * loop, it has pushed its failure
03270              point already; if so, discard that as redundant.  */
03271           if ((re_opcode_t) *p != on_failure_jump
03272           && (re_opcode_t) *p != succeed_n)
03273         continue;
03274 
03275           p++;
03276           EXTRACT_NUMBER_AND_INCR (j, p);
03277           p += j;
03278 
03279           /* If what's on the stack is where we are now, pop it.  */
03280           if (!FAIL_STACK_EMPTY ()
03281           && fail_stack.stack[fail_stack.avail - 1].pointer == p)
03282             fail_stack.avail--;
03283 
03284           continue;
03285 
03286 
03287         case on_failure_jump:
03288         case on_failure_keep_string_jump:
03289     handle_on_failure_jump:
03290           EXTRACT_NUMBER_AND_INCR (j, p);
03291 
03292           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
03293              end of the pattern.  We don't want to push such a point,
03294              since when we restore it above, entering the switch will
03295              increment `p' past the end of the pattern.  We don't need
03296              to push such a point since we obviously won't find any more
03297              fastmap entries beyond `pend'.  Such a pattern can match
03298              the null string, though.  */
03299           if (p + j < pend)
03300             {
03301               if (!PUSH_PATTERN_OP (p + j, fail_stack))
03302         {
03303           RESET_FAIL_STACK ();
03304           return -2;
03305         }
03306             }
03307           else
03308             bufp->can_be_null = 1;
03309 
03310           if (succeed_n_p)
03311             {
03312               EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
03313               succeed_n_p = false;
03314         }
03315 
03316           continue;
03317 
03318 
03319     case succeed_n:
03320           /* Get to the number of times to succeed.  */
03321           p += 2;
03322 
03323           /* Increment p past the n for when k != 0.  */
03324           EXTRACT_NUMBER_AND_INCR (k, p);
03325           if (k == 0)
03326         {
03327               p -= 4;
03328           succeed_n_p = true;  /* Spaghetti code alert.  */
03329               goto handle_on_failure_jump;
03330             }
03331           continue;
03332 
03333 
03334     case set_number_at:
03335           p += 4;
03336           continue;
03337 
03338 
03339     case start_memory:
03340         case stop_memory:
03341       p += 2;
03342       continue;
03343 
03344 
03345     default:
03346           abort (); /* We have listed all the cases.  */
03347         } /* switch *p++ */
03348 
03349       /* Getting here means we have found the possible starting
03350          characters for one path of the pattern -- and that the empty
03351          string does not match.  We need not follow this path further.
03352          Instead, look at the next alternative (remembered on the
03353          stack), or quit if no more.  The test at the top of the loop
03354          does these things.  */
03355       path_can_be_null = false;
03356       p = pend;
03357     } /* while p */
03358 
03359   /* Set `can_be_null' for the last path (also the first path, if the
03360      pattern is empty).  */
03361   bufp->can_be_null |= path_can_be_null;

const char* re_compile_pattern ( char *  pattern,
size_t  length,
struct re_pattern_buffer bufp 
) const

Definition at line 5383 of file regex.c.

05392 {
05393   reg_errcode_t ret;
05394 
05395   /* GNU code is written to assume at least RE_NREGS registers will be set
05396      (and at least one extra will be -1).  */
05397   bufp->regs_allocated = REGS_UNALLOCATED;
05398 
05399   /* And GNU code determines whether or not to get register information
05400      by passing null for the REGS argument to re_match, etc., not by
05401      setting no_sub.  */
05402   bufp->no_sub = 0;
05403 
05404   /* Match anchors at newline.  */
05405   bufp->newline_anchor = 1;
05406 
05407   ret = regex_compile (pattern, length, re_syntax_options, bufp);

int re_match ( struct re_pattern_buffer bufp,
const char *  string,
int  size,
int  pos,
struct re_registers regs 
)

Definition at line 3655 of file regex.c.

03665 {
03666   int result = re_match_2_internal (bufp, NULL, 0, string, size,
03667                     pos, regs, size);
03668 #ifndef REGEX_MALLOC
03669 #ifdef C_ALLOCA

int re_match_2 ( struct re_pattern_buffer bufp,
const char *  string1,
int  size1,
const char *  string2,
int  size2,
int  pos,
struct re_registers regs,
int  stop 
)

Definition at line 3698 of file regex.c.

03710 {
03711   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
03712                     pos, regs, stop);
03713 #ifndef REGEX_MALLOC
03714 #ifdef C_ALLOCA

static int re_match_2_internal ( struct re_pattern_buffer bufp,
const char *  string1,
int  size1,
const char *  string2,
int  size2,
int  pos,
struct re_registers regs,
int  stop 
) [static]

Definition at line 3719 of file regex.c.

03731 {
03732   /* General temporaries.  */
03733   int mcnt;
03734   unsigned char *p1;
03735 
03736   /* Just past the end of the corresponding string.  */
03737   const char *end1, *end2;
03738 
03739   /* Pointers into string1 and string2, just past the last characters in
03740      each to consider matching.  */
03741   const char *end_match_1, *end_match_2;
03742 
03743   /* Where we are in the data, and the end of the current string.  */
03744   const char *d, *dend;
03745 
03746   /* Where we are in the pattern, and the end of the pattern.  */
03747   unsigned char *p = bufp->buffer;
03748   register unsigned char *pend = p + bufp->used;
03749 
03750   /* Mark the opcode just after a start_memory, so we can test for an
03751      empty subpattern when we get to the stop_memory.  */
03752   unsigned char *just_past_start_mem = 0;
03753 
03754   /* We use this to map every character in the string.  */
03755   RE_TRANSLATE_TYPE translate = bufp->translate;
03756 
03757   /* Failure point stack.  Each place that can handle a failure further
03758      down the line pushes a failure point on this stack.  It consists of
03759      restart, regend, and reg_info for all registers corresponding to
03760      the subexpressions we're currently inside, plus the number of such
03761      registers, and, finally, two char *'s.  The first char * is where
03762      to resume scanning the pattern; the second one is where to resume
03763      scanning the strings.  If the latter is zero, the failure point is
03764      a ``dummy''; if a failure happens and the failure point is a dummy,
03765      it gets discarded and the next next one is tried.  */
03766 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
03767   fail_stack_type fail_stack;
03768 #endif
03769 #ifdef DEBUG
03770   static unsigned failure_id = 0;
03771   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03772 #endif
03773 
03774 #ifdef REL_ALLOC
03775   /* This holds the pointer to the failure stack, when
03776      it is allocated relocatably.  */
03777   fail_stack_elt_t *failure_stack_ptr;
03778 #endif
03779 
03780   /* We fill all the registers internally, independent of what we
03781      return, for use in backreferences.  The number here includes
03782      an element for register zero.  */
03783   size_t num_regs = bufp->re_nsub + 1;
03784 
03785   /* The currently active registers.  */
03786   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03787   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03788 
03789   /* Information on the contents of registers. These are pointers into
03790      the input strings; they record just what was matched (on this
03791      attempt) by a subexpression part of the pattern, that is, the
03792      regnum-th regstart pointer points to where in the pattern we began
03793      matching and the regnum-th regend points to right after where we
03794      stopped matching the regnum-th subexpression.  (The zeroth register
03795      keeps track of what the whole pattern matches.)  */
03796 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03797   const char **regstart, **regend;
03798 #endif
03799 
03800   /* If a group that's operated upon by a repetition operator fails to
03801      match anything, then the register for its start will need to be
03802      restored because it will have been set to wherever in the string we
03803      are when we last see its open-group operator.  Similarly for a
03804      register's end.  */
03805 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03806   const char **old_regstart, **old_regend;
03807 #endif
03808 
03809   /* The is_active field of reg_info helps us keep track of which (possibly
03810      nested) subexpressions we are currently in. The matched_something
03811      field of reg_info[reg_num] helps us tell whether or not we have
03812      matched any of the pattern so far this time through the reg_num-th
03813      subexpression.  These two fields get reset each time through any
03814      loop their register is in.  */
03815 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
03816   register_info_type *reg_info;
03817 #endif
03818 
03819   /* The following record the register info as found in the above
03820      variables when we find a match better than any we've seen before.
03821      This happens as we backtrack through the failure points, which in
03822      turn happens only if we have not yet matched the entire string. */
03823   unsigned best_regs_set = false;
03824 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03825   const char **best_regstart, **best_regend;
03826 #endif
03827 
03828   /* Logically, this is `best_regend[0]'.  But we don't want to have to
03829      allocate space for that if we're not allocating space for anything
03830      else (see below).  Also, we never need info about register 0 for
03831      any of the other register vectors, and it seems rather a kludge to
03832      treat `best_regend' differently than the rest.  So we keep track of
03833      the end of the best match so far in a separate variable.  We
03834      initialize this to NULL so that when we backtrack the first time
03835      and need to test it, it's not garbage.  */
03836   const char *match_end = NULL;
03837 
03838   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
03839   int set_regs_matched_done = 0;
03840 
03841   /* Used when we pop values we don't care about.  */
03842 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03843   const char **reg_dummy;
03844   register_info_type *reg_info_dummy;
03845 #endif
03846 
03847 #ifdef DEBUG
03848   /* Counts the total number of registers pushed.  */
03849   unsigned num_regs_pushed = 0;
03850 #endif
03851 
03852   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03853 
03854   INIT_FAIL_STACK ();
03855 
03856 #ifdef MATCH_MAY_ALLOCATE
03857   /* Do not bother to initialize all the register variables if there are
03858      no groups in the pattern, as it takes a fair amount of time.  If
03859      there are groups, we include space for register 0 (the whole
03860      pattern), even though we never use it, since it simplifies the
03861      array indexing.  We should fix this.  */
03862   if (bufp->re_nsub)
03863     {
03864       regstart = REGEX_TALLOC (num_regs, const char *);
03865       regend = REGEX_TALLOC (num_regs, const char *);
03866       old_regstart = REGEX_TALLOC (num_regs, const char *);
03867       old_regend = REGEX_TALLOC (num_regs, const char *);
03868       best_regstart = REGEX_TALLOC (num_regs, const char *);
03869       best_regend = REGEX_TALLOC (num_regs, const char *);
03870       reg_info = REGEX_TALLOC (num_regs, register_info_type);
03871       reg_dummy = REGEX_TALLOC (num_regs, const char *);
03872       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03873 
03874       if (!(regstart && regend && old_regstart && old_regend && reg_info
03875             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
03876         {
03877           FREE_VARIABLES ();
03878           return -2;
03879         }
03880     }
03881   else
03882     {
03883       /* We must initialize all our variables to NULL, so that
03884          `FREE_VARIABLES' doesn't try to free them.  */
03885       regstart = regend = old_regstart = old_regend = best_regstart
03886         = best_regend = reg_dummy = NULL;
03887       reg_info = reg_info_dummy = (register_info_type *) NULL;
03888     }
03889 #endif /* MATCH_MAY_ALLOCATE */
03890 
03891   /* The starting position is bogus.  */
03892   if (pos < 0 || pos > size1 + size2)
03893     {
03894       FREE_VARIABLES ();
03895       return -1;
03896     }
03897 
03898   /* Initialize subexpression text positions to -1 to mark ones that no
03899      start_memory/stop_memory has been seen for. Also initialize the
03900      register information struct.  */
03901   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
03902     {
03903       regstart[mcnt] = regend[mcnt]
03904         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
03905 
03906       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
03907       IS_ACTIVE (reg_info[mcnt]) = 0;
03908       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03909       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03910     }
03911 
03912   /* We move `string1' into `string2' if the latter's empty -- but not if
03913      `string1' is null.  */
03914   if (size2 == 0 && string1 != NULL)
03915     {
03916       string2 = string1;
03917       size2 = size1;
03918       string1 = 0;
03919       size1 = 0;
03920     }
03921   end1 = string1 + size1;
03922   end2 = string2 + size2;
03923 
03924   /* Compute where to stop matching, within the two strings.  */
03925   if (stop <= size1)
03926     {
03927       end_match_1 = string1 + stop;
03928       end_match_2 = string2;
03929     }
03930   else
03931     {
03932       end_match_1 = end1;
03933       end_match_2 = string2 + stop - size1;
03934     }
03935 
03936   /* `p' scans through the pattern as `d' scans through the data.
03937      `dend' is the end of the input string that `d' points within.  `d'
03938      is advanced into the following input string whenever necessary, but
03939      this happens before fetching; therefore, at the beginning of the
03940      loop, `d' can be pointing at the end of a string, but it cannot
03941      equal `string2'.  */
03942   if (size1 > 0 && pos <= size1)
03943     {
03944       d = string1 + pos;
03945       dend = end_match_1;
03946     }
03947   else
03948     {
03949       d = string2 + pos - size1;
03950       dend = end_match_2;
03951     }
03952 
03953   DEBUG_PRINT1 ("The compiled pattern is:\n");
03954   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
03955   DEBUG_PRINT1 ("The string to match is: `");
03956   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
03957   DEBUG_PRINT1 ("'\n");
03958 
03959   /* This loops over pattern commands.  It exits by returning from the
03960      function if the match is complete, or it drops through if the match
03961      fails at this starting point in the input data.  */
03962   for (;;)
03963     {
03964 #ifdef _LIBC
03965       DEBUG_PRINT2 ("\n%p: ", p);
03966 #else
03967       DEBUG_PRINT2 ("\n0x%x: ", p);
03968 #endif
03969 
03970       if (p == pend)
03971     { /* End of pattern means we might have succeeded.  */
03972           DEBUG_PRINT1 ("end of pattern ... ");
03973 
03974       /* If we haven't matched the entire string, and we want the
03975              longest match, try backtracking.  */
03976           if (d != end_match_2)
03977         {
03978           /* 1 if this match ends in the same string (string1 or string2)
03979          as the best previous match.  */
03980           boolean same_str_p = (FIRST_STRING_P (match_end)
03981                     == MATCHING_IN_FIRST_STRING);
03982           /* 1 if this match is the best seen so far.  */
03983           boolean best_match_p;
03984 
03985           /* AIX compiler got confused when this was combined
03986          with the previous declaration.  */
03987           if (same_str_p)
03988         best_match_p = d > match_end;
03989           else
03990         best_match_p = !MATCHING_IN_FIRST_STRING;
03991 
03992               DEBUG_PRINT1 ("backtracking.\n");
03993 
03994               if (!FAIL_STACK_EMPTY ())
03995                 { /* More failure points to try.  */
03996 
03997                   /* If exceeds best match so far, save it.  */
03998                   if (!best_regs_set || best_match_p)
03999                     {
04000                       best_regs_set = true;
04001                       match_end = d;
04002 
04003                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
04004 
04005                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04006                         {
04007                           best_regstart[mcnt] = regstart[mcnt];
04008                           best_regend[mcnt] = regend[mcnt];
04009                         }
04010                     }
04011                   goto fail;
04012                 }
04013 
04014               /* If no failure points, don't restore garbage.  And if
04015                  last match is real best match, don't restore second
04016                  best one. */
04017               else if (best_regs_set && !best_match_p)
04018                 {
04019             restore_best_regs:
04020                   /* Restore best match.  It may happen that `dend ==
04021                      end_match_1' while the restored d is in string2.
04022                      For example, the pattern `x.*y.*z' against the
04023                      strings `x-' and `y-z-', if the two strings are
04024                      not consecutive in memory.  */
04025                   DEBUG_PRINT1 ("Restoring best registers.\n");
04026 
04027                   d = match_end;
04028                   dend = ((d >= string1 && d <= end1)
04029                    ? end_match_1 : end_match_2);
04030 
04031           for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04032             {
04033               regstart[mcnt] = best_regstart[mcnt];
04034               regend[mcnt] = best_regend[mcnt];
04035             }
04036                 }
04037             } /* d != end_match_2 */
04038 
04039     succeed_label:
04040           DEBUG_PRINT1 ("Accepting match.\n");
04041 
04042           /* If caller wants register contents data back, do it.  */
04043           if (regs && !bufp->no_sub)
04044         {
04045               /* Have the register data arrays been allocated?  */
04046               if (bufp->regs_allocated == REGS_UNALLOCATED)
04047                 { /* No.  So allocate them with malloc.  We need one
04048                      extra element beyond `num_regs' for the `-1' marker
04049                      GNU code uses.  */
04050                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
04051                   regs->start = TALLOC (regs->num_regs, regoff_t);
04052                   regs->end = TALLOC (regs->num_regs, regoff_t);
04053                   if (regs->start == NULL || regs->end == NULL)
04054             {
04055               FREE_VARIABLES ();
04056               return -2;
04057             }
04058                   bufp->regs_allocated = REGS_REALLOCATE;
04059                 }
04060               else if (bufp->regs_allocated == REGS_REALLOCATE)
04061                 { /* Yes.  If we need more elements than were already
04062                      allocated, reallocate them.  If we need fewer, just
04063                      leave it alone.  */
04064                   if (regs->num_regs < num_regs + 1)
04065                     {
04066                       regs->num_regs = num_regs + 1;
04067                       RETALLOC (regs->start, regs->num_regs, regoff_t);
04068                       RETALLOC (regs->end, regs->num_regs, regoff_t);
04069                       if (regs->start == NULL || regs->end == NULL)
04070             {
04071               FREE_VARIABLES ();
04072               return -2;
04073             }
04074                     }
04075                 }
04076               else
04077         {
04078           /* These braces fend off a "empty body in an else-statement"
04079              warning under GCC when assert expands to nothing.  */
04080           assert (bufp->regs_allocated == REGS_FIXED);
04081         }
04082 
04083               /* Convert the pointer data in `regstart' and `regend' to
04084                  indices.  Register zero has to be set differently,
04085                  since we haven't kept track of any info for it.  */
04086               if (regs->num_regs > 0)
04087                 {
04088                   regs->start[0] = pos;
04089                   regs->end[0] = (MATCHING_IN_FIRST_STRING
04090                   ? ((regoff_t) (d - string1))
04091                       : ((regoff_t) (d - string2 + size1)));
04092                 }
04093 
04094               /* Go through the first `min (num_regs, regs->num_regs)'
04095                  registers, since that is all we initialized.  */
04096           for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
04097            mcnt++)
04098         {
04099                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
04100                     regs->start[mcnt] = regs->end[mcnt] = -1;
04101                   else
04102                     {
04103               regs->start[mcnt]
04104             = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
04105                       regs->end[mcnt]
04106             = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
04107                     }
04108         }
04109 
04110               /* If the regs structure we return has more elements than
04111                  were in the pattern, set the extra elements to -1.  If
04112                  we (re)allocated the registers, this is the case,
04113                  because we always allocate enough to have at least one
04114                  -1 at the end.  */
04115               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
04116                 regs->start[mcnt] = regs->end[mcnt] = -1;
04117         } /* regs && !bufp->no_sub */
04118 
04119           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
04120                         nfailure_points_pushed, nfailure_points_popped,
04121                         nfailure_points_pushed - nfailure_points_popped);
04122           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
04123 
04124           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
04125                 ? string1
04126                 : string2 - size1);
04127 
04128           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
04129 
04130           FREE_VARIABLES ();
04131           return mcnt;
04132         }
04133 
04134       /* Otherwise match next pattern command.  */
04135       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
04136     {
04137         /* Ignore these.  Used to ignore the n of succeed_n's which
04138            currently have n == 0.  */
04139         case no_op:
04140           DEBUG_PRINT1 ("EXECUTING no_op.\n");
04141           break;
04142 
04143     case succeed:
04144           DEBUG_PRINT1 ("EXECUTING succeed.\n");
04145       goto succeed_label;
04146 
04147         /* Match the next n pattern characters exactly.  The following
04148            byte in the pattern defines n, and the n bytes after that
04149            are the characters to match.  */
04150     case exactn:
04151       mcnt = *p++;
04152           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
04153 
04154           /* This is written out as an if-else so we don't waste time
04155              testing `translate' inside the loop.  */
04156           if (translate)
04157         {
04158           do
04159         {
04160           PREFETCH ();
04161           if ((unsigned char) translate[(unsigned char) *d++]
04162               != (unsigned char) *p++)
04163                     goto fail;
04164         }
04165           while (--mcnt);
04166         }
04167       else
04168         {
04169           do
04170         {
04171           PREFETCH ();
04172           if (*d++ != (char) *p++) goto fail;
04173         }
04174           while (--mcnt);
04175         }
04176       SET_REGS_MATCHED ();
04177           break;
04178 
04179 
04180         /* Match any character except possibly a newline or a null.  */
04181     case anychar:
04182           DEBUG_PRINT1 ("EXECUTING anychar.\n");
04183 
04184           PREFETCH ();
04185 
04186           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
04187               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
04188         goto fail;
04189 
04190           SET_REGS_MATCHED ();
04191           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
04192           d++;
04193       break;
04194 
04195 
04196     case charset:
04197     case charset_not:
04198       {
04199         register unsigned char c;
04200         boolean not = (re_opcode_t) *(p - 1) == charset_not;
04201 
04202             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
04203 
04204         PREFETCH ();
04205         c = TRANSLATE (*d); /* The character to match.  */
04206 
04207             /* Cast to `unsigned' instead of `unsigned char' in case the
04208                bit list is a full 32 bytes long.  */
04209         if (c < (unsigned) (*p * BYTEWIDTH)
04210         && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04211           not = !not;
04212 
04213         p += 1 + *p;
04214 
04215         if (!not) goto fail;
04216 
04217         SET_REGS_MATCHED ();
04218             d++;
04219         break;
04220       }
04221 
04222 
04223         /* The beginning of a group is represented by start_memory.
04224            The arguments are the register number in the next byte, and the
04225            number of groups inner to this one in the next.  The text
04226            matched within the group is recorded (in the internal
04227            registers data structure) under the register number.  */
04228         case start_memory:
04229       DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
04230 
04231           /* Find out if this group can match the empty string.  */
04232       p1 = p;       /* To send to group_match_null_string_p.  */
04233 
04234           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
04235             REG_MATCH_NULL_STRING_P (reg_info[*p])
04236                   = group_match_null_string_p (&p1, pend, reg_info);
04237 
04238           /* Save the position in the string where we were the last time
04239              we were at this open-group operator in case the group is
04240              operated upon by a repetition operator, e.g., with `(a*)*b'
04241              against `ab'; then we want to ignore where we are now in
04242              the string in case this attempt to match fails.  */
04243           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04244                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
04245                              : regstart[*p];
04246       DEBUG_PRINT2 ("  old_regstart: %d\n",
04247              POINTER_TO_OFFSET (old_regstart[*p]));
04248 
04249           regstart[*p] = d;
04250       DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
04251 
04252           IS_ACTIVE (reg_info[*p]) = 1;
04253           MATCHED_SOMETHING (reg_info[*p]) = 0;
04254 
04255       /* Clear this whenever we change the register activity status.  */
04256       set_regs_matched_done = 0;
04257 
04258           /* This is the new highest active register.  */
04259           highest_active_reg = *p;
04260 
04261           /* If nothing was active before, this is the new lowest active
04262              register.  */
04263           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04264             lowest_active_reg = *p;
04265 
04266           /* Move past the register number and inner group count.  */
04267           p += 2;
04268       just_past_start_mem = p;
04269 
04270           break;
04271 
04272 
04273         /* The stop_memory opcode represents the end of a group.  Its
04274            arguments are the same as start_memory's: the register
04275            number, and the number of inner groups.  */
04276     case stop_memory:
04277       DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
04278 
04279           /* We need to save the string position the last time we were at
04280              this close-group operator in case the group is operated
04281              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
04282              against `aba'; then we want to ignore where we are now in
04283              the string in case this attempt to match fails.  */
04284           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04285                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
04286                : regend[*p];
04287       DEBUG_PRINT2 ("      old_regend: %d\n",
04288              POINTER_TO_OFFSET (old_regend[*p]));
04289 
04290           regend[*p] = d;
04291       DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
04292 
04293           /* This register isn't active anymore.  */
04294           IS_ACTIVE (reg_info[*p]) = 0;
04295 
04296       /* Clear this whenever we change the register activity status.  */
04297       set_regs_matched_done = 0;
04298 
04299           /* If this was the only register active, nothing is active
04300              anymore.  */
04301           if (lowest_active_reg == highest_active_reg)
04302             {
04303               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04304               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04305             }
04306           else
04307             { /* We must scan for the new highest active register, since
04308                  it isn't necessarily one less than now: consider
04309                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
04310                  new highest active register is 1.  */
04311               unsigned char r = *p - 1;
04312               while (r > 0 && !IS_ACTIVE (reg_info[r]))
04313                 r--;
04314 
04315               /* If we end up at register zero, that means that we saved
04316                  the registers as the result of an `on_failure_jump', not
04317                  a `start_memory', and we jumped to past the innermost
04318                  `stop_memory'.  For example, in ((.)*) we save
04319                  registers 1 and 2 as a result of the *, but when we pop
04320                  back to the second ), we are at the stop_memory 1.
04321                  Thus, nothing is active.  */
04322           if (r == 0)
04323                 {
04324                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04325                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04326                 }
04327               else
04328                 highest_active_reg = r;
04329             }
04330 
04331           /* If just failed to match something this time around with a
04332              group that's operated on by a repetition operator, try to
04333              force exit from the ``loop'', and restore the register
04334              information for this group that we had before trying this
04335              last match.  */
04336           if ((!MATCHED_SOMETHING (reg_info[*p])
04337                || just_past_start_mem == p - 1)
04338           && (p + 2) < pend)
04339             {
04340               boolean is_a_jump_n = false;
04341 
04342               p1 = p + 2;
04343               mcnt = 0;
04344               switch ((re_opcode_t) *p1++)
04345                 {
04346                   case jump_n:
04347             is_a_jump_n = true;
04348                   case pop_failure_jump:
04349           case maybe_pop_jump:
04350           case jump:
04351           case dummy_failure_jump:
04352                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04353             if (is_a_jump_n)
04354               p1 += 2;
04355                     break;
04356 
04357                   default:
04358                     /* do nothing */ ;
04359                 }
04360           p1 += mcnt;
04361 
04362               /* If the next operation is a jump backwards in the pattern
04363              to an on_failure_jump right before the start_memory
04364                  corresponding to this stop_memory, exit from the loop
04365                  by forcing a failure after pushing on the stack the
04366                  on_failure_jump's jump in the pattern, and d.  */
04367               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
04368                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
04369         {
04370                   /* If this group ever matched anything, then restore
04371                      what its registers were before trying this last
04372                      failed match, e.g., with `(a*)*b' against `ab' for
04373                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
04374                      against `aba' for regend[3].
04375 
04376                      Also restore the registers for inner groups for,
04377                      e.g., `((a*)(b*))*' against `aba' (register 3 would
04378                      otherwise get trashed).  */
04379 
04380                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
04381             {
04382               unsigned r;
04383 
04384                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
04385 
04386               /* Restore this and inner groups' (if any) registers.  */
04387                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
04388                r++)
04389                         {
04390                           regstart[r] = old_regstart[r];
04391 
04392                           /* xx why this test?  */
04393                           if (old_regend[r] >= regstart[r])
04394                             regend[r] = old_regend[r];
04395                         }
04396                     }
04397           p1++;
04398                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04399                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
04400 
04401                   goto fail;
04402                 }
04403             }
04404 
04405           /* Move past the register number and the inner group count.  */
04406           p += 2;
04407           break;
04408 
04409 
04410     /* <digit> has been turned into a `duplicate' command which is
04411            followed by the numeric value of <digit> as the register number.  */
04412         case duplicate:
04413       {
04414         register const char *d2, *dend2;
04415         int regno = *p++;   /* Get which register to match against.  */
04416         DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
04417 
04418         /* Can't back reference a group which we've never matched.  */
04419             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
04420               goto fail;
04421 
04422             /* Where in input to try to start matching.  */
04423             d2 = regstart[regno];
04424 
04425             /* Where to stop matching; if both the place to start and
04426                the place to stop matching are in the same string, then
04427                set to the place to stop, otherwise, for now have to use
04428                the end of the first string.  */
04429 
04430             dend2 = ((FIRST_STRING_P (regstart[regno])
04431               == FIRST_STRING_P (regend[regno]))
04432              ? regend[regno] : end_match_1);
04433         for (;;)
04434           {
04435         /* If necessary, advance to next segment in register
04436                    contents.  */
04437         while (d2 == dend2)
04438           {
04439             if (dend2 == end_match_2) break;
04440             if (dend2 == regend[regno]) break;
04441 
04442                     /* End of string1 => advance to string2. */
04443                     d2 = string2;
04444                     dend2 = regend[regno];
04445           }
04446         /* At end of register contents => success */
04447         if (d2 == dend2) break;
04448 
04449         /* If necessary, advance to next segment in data.  */
04450         PREFETCH ();
04451 
04452         /* How many characters left in this segment to match.  */
04453         mcnt = dend - d;
04454 
04455         /* Want how many consecutive characters we can match in
04456                    one shot, so, if necessary, adjust the count.  */
04457                 if (mcnt > dend2 - d2)
04458           mcnt = dend2 - d2;
04459 
04460         /* Compare that many; failure if mismatch, else move
04461                    past them.  */
04462         if (translate
04463                     ? bcmp_translate (d, d2, mcnt, translate)
04464                     : bcmp (d, d2, mcnt))
04465           goto fail;
04466         d += mcnt, d2 += mcnt;
04467 
04468         /* Do this because we've match some characters.  */
04469         SET_REGS_MATCHED ();
04470           }
04471       }
04472       break;
04473 
04474 
04475         /* begline matches the empty string at the beginning of the string
04476            (unless `not_bol' is set in `bufp'), and, if
04477            `newline_anchor' is set, after newlines.  */
04478     case begline:
04479           DEBUG_PRINT1 ("EXECUTING begline.\n");
04480 
04481           if (AT_STRINGS_BEG (d))
04482             {
04483               if (!bufp->not_bol) break;
04484             }
04485           else if (d[-1] == '\n' && bufp->newline_anchor)
04486             {
04487               break;
04488             }
04489           /* In all other cases, we fail.  */
04490           goto fail;
04491 
04492 
04493         /* endline is the dual of begline.  */
04494     case endline:
04495           DEBUG_PRINT1 ("EXECUTING endline.\n");
04496 
04497           if (AT_STRINGS_END (d))
04498             {
04499               if (!bufp->not_eol) break;
04500             }
04501 
04502           /* We have to ``prefetch'' the next character.  */
04503           else if ((d == end1 ? *string2 : *d) == '\n'
04504                    && bufp->newline_anchor)
04505             {
04506               break;
04507             }
04508           goto fail;
04509 
04510 
04511     /* Match at the very beginning of the data.  */
04512         case begbuf:
04513           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04514           if (AT_STRINGS_BEG (d))
04515             break;
04516           goto fail;
04517 
04518 
04519     /* Match at the very end of the data.  */
04520         case endbuf:
04521           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04522       if (AT_STRINGS_END (d))
04523         break;
04524           goto fail;
04525 
04526 
04527         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
04528            pushes NULL as the value for the string on the stack.  Then
04529            `pop_failure_point' will keep the current value for the
04530            string, instead of restoring it.  To see why, consider
04531            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
04532            then the . fails against the \n.  But the next thing we want
04533            to do is match the \n against the \n; if we restored the
04534            string value, we would be back at the foo.
04535 
04536            Because this is used only in specific cases, we don't need to
04537            check all the things that `on_failure_jump' does, to make
04538            sure the right things get saved on the stack.  Hence we don't
04539            share its code.  The only reason to push anything on the
04540            stack at all is that otherwise we would have to change
04541            `anychar's code to do something besides goto fail in this
04542            case; that seems worse than this.  */
04543         case on_failure_keep_string_jump:
04544           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04545 
04546           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04547 #ifdef _LIBC
04548           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
04549 #else
04550           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04551 #endif
04552 
04553           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04554           break;
04555 
04556 
04557     /* Uses of on_failure_jump:
04558 
04559            Each alternative starts with an on_failure_jump that points
04560            to the beginning of the next alternative.  Each alternative
04561            except the last ends with a jump that in effect jumps past
04562            the rest of the alternatives.  (They really jump to the
04563            ending jump of the following alternative, because tensioning
04564            these jumps is a hassle.)
04565 
04566            Repeats start with an on_failure_jump that points past both
04567            the repetition text and either the following jump or
04568            pop_failure_jump back to this on_failure_jump.  */
04569     case on_failure_jump:
04570         on_failure:
04571           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04572 
04573           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04574 #ifdef _LIBC
04575           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
04576 #else
04577           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04578 #endif
04579 
04580           /* If this on_failure_jump comes right before a group (i.e.,
04581              the original * applied to a group), save the information
04582              for that group and all inner ones, so that if we fail back
04583              to this point, the group's information will be correct.
04584              For example, in \(a*\)*\1, we need the preceding group,
04585              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
04586 
04587           /* We can't use `p' to check ahead because we push
04588              a failure point to `p + mcnt' after we do this.  */
04589           p1 = p;
04590 
04591           /* We need to skip no_op's before we look for the
04592              start_memory in case this on_failure_jump is happening as
04593              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
04594              against aba.  */
04595           while (p1 < pend && (re_opcode_t) *p1 == no_op)
04596             p1++;
04597 
04598           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04599             {
04600               /* We have a new highest active register now.  This will
04601                  get reset at the start_memory we are about to get to,
04602                  but we will have saved all the registers relevant to
04603                  this repetition op, as described above.  */
04604               highest_active_reg = *(p1 + 1) + *(p1 + 2);
04605               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04606                 lowest_active_reg = *(p1 + 1);
04607             }
04608 
04609           DEBUG_PRINT1 (":\n");
04610           PUSH_FAILURE_POINT (p + mcnt, d, -2);
04611           break;
04612 
04613 
04614         /* A smart repeat ends with `maybe_pop_jump'.
04615        We change it to either `pop_failure_jump' or `jump'.  */
04616         case maybe_pop_jump:
04617           EXTRACT_NUMBER_AND_INCR (mcnt, p);
04618           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04619           {
04620         register unsigned char *p2 = p;
04621 
04622             /* Compare the beginning of the repeat with what in the
04623                pattern follows its end. If we can establish that there
04624                is nothing that they would both match, i.e., that we
04625                would have to backtrack because of (as in, e.g., `a*a')
04626                then we can change to pop_failure_jump, because we'll
04627                never have to backtrack.
04628 
04629                This is not true in the case of alternatives: in
04630                `(a|ab)*' we do need to backtrack to the `ab' alternative
04631                (e.g., if the string was `ab').  But instead of trying to
04632                detect that here, the alternative has put on a dummy
04633                failure point which is what we will end up popping.  */
04634 
04635         /* Skip over open/close-group commands.
04636            If what follows this loop is a ...+ construct,
04637            look at what begins its body, since we will have to
04638            match at least one of that.  */
04639         while (1)
04640           {
04641         if (p2 + 2 < pend
04642             && ((re_opcode_t) *p2 == stop_memory
04643             || (re_opcode_t) *p2 == start_memory))
04644           p2 += 3;
04645         else if (p2 + 6 < pend
04646              && (re_opcode_t) *p2 == dummy_failure_jump)
04647           p2 += 6;
04648         else
04649           break;
04650           }
04651 
04652         p1 = p + mcnt;
04653         /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
04654            to the `maybe_finalize_jump' of this case.  Examine what
04655            follows.  */
04656 
04657             /* If we're at the end of the pattern, we can change.  */
04658             if (p2 == pend)
04659           {
04660         /* Consider what happens when matching ":\(.*\)"
04661            against ":/".  I don't really understand this code
04662            yet.  */
04663             p[-3] = (unsigned char) pop_failure_jump;
04664                 DEBUG_PRINT1
04665                   ("  End of pattern: change to `pop_failure_jump'.\n");
04666               }
04667 
04668             else if ((re_opcode_t) *p2 == exactn
04669              || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04670           {
04671         register unsigned char c
04672                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
04673 
04674                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04675                   {
04676             p[-3] = (unsigned char) pop_failure_jump;
04677                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04678                                   c, p1[5]);
04679                   }
04680 
04681         else if ((re_opcode_t) p1[3] == charset
04682              || (re_opcode_t) p1[3] == charset_not)
04683           {
04684             int not = (re_opcode_t) p1[3] == charset_not;
04685 
04686             if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04687             && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04688               not = !not;
04689 
04690                     /* `not' is equal to 1 if c would match, which means
04691                         that we can't change to pop_failure_jump.  */
04692             if (!not)
04693                       {
04694                 p[-3] = (unsigned char) pop_failure_jump;
04695                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04696                       }
04697           }
04698           }
04699             else if ((re_opcode_t) *p2 == charset)
04700           {
04701 #ifdef DEBUG
04702         register unsigned char c
04703                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
04704 #endif
04705 
04706 #if 0
04707                 if ((re_opcode_t) p1[3] == exactn
04708             && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
04709               && (p2[2 + p1[5] / BYTEWIDTH]
04710                   & (1 << (p1[5] % BYTEWIDTH)))))
04711 #else
04712                 if ((re_opcode_t) p1[3] == exactn
04713             && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
04714               && (p2[2 + p1[4] / BYTEWIDTH]
04715                   & (1 << (p1[4] % BYTEWIDTH)))))
04716 #endif
04717                   {
04718             p[-3] = (unsigned char) pop_failure_jump;
04719                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04720                                   c, p1[5]);
04721                   }
04722 
04723         else if ((re_opcode_t) p1[3] == charset_not)
04724           {
04725             int idx;
04726             /* We win if the charset_not inside the loop
04727                lists every character listed in the charset after.  */
04728             for (idx = 0; idx < (int) p2[1]; idx++)
04729               if (! (p2[2 + idx] == 0
04730                  || (idx < (int) p1[4]
04731                  && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
04732             break;
04733 
04734             if (idx == p2[1])
04735                       {
04736                 p[-3] = (unsigned char) pop_failure_jump;
04737                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04738                       }
04739           }
04740         else if ((re_opcode_t) p1[3] == charset)
04741           {
04742             int idx;
04743             /* We win if the charset inside the loop
04744                has no overlap with the one after the loop.  */
04745             for (idx = 0;
04746              idx < (int) p2[1] && idx < (int) p1[4];
04747              idx++)
04748               if ((p2[2 + idx] & p1[5 + idx]) != 0)
04749             break;
04750 
04751             if (idx == p2[1] || idx == p1[4])
04752                       {
04753                 p[-3] = (unsigned char) pop_failure_jump;
04754                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04755                       }
04756           }
04757           }
04758       }
04759       p -= 2;       /* Point at relative address again.  */
04760       if ((re_opcode_t) p[-1] != pop_failure_jump)
04761         {
04762           p[-1] = (unsigned char) jump;
04763               DEBUG_PRINT1 ("  Match => jump.\n");
04764           goto unconditional_jump;
04765         }
04766         /* Note fall through.  */
04767 
04768 
04769     /* The end of a simple repeat has a pop_failure_jump back to
04770            its matching on_failure_jump, where the latter will push a
04771            failure point.  The pop_failure_jump takes off failure
04772            points put on by this pop_failure_jump's matching
04773            on_failure_jump; we got through the pattern to here from the
04774            matching on_failure_jump, so didn't fail.  */
04775         case pop_failure_jump:
04776           {
04777             /* We need to pass separate storage for the lowest and
04778                highest registers, even though we don't care about the
04779                actual values.  Otherwise, we will restore only one
04780                register from the stack, since lowest will == highest in
04781                `pop_failure_point'.  */
04782             active_reg_t dummy_low_reg, dummy_high_reg;
04783             unsigned char *pdummy;
04784             const char *sdummy;
04785 
04786             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04787             POP_FAILURE_POINT (sdummy, pdummy,
04788                                dummy_low_reg, dummy_high_reg,
04789                                reg_dummy, reg_dummy, reg_info_dummy);
04790           }
04791       /* Note fall through.  */
04792 
04793     unconditional_jump:
04794 #ifdef _LIBC
04795       DEBUG_PRINT2 ("\n%p: ", p);
04796 #else
04797       DEBUG_PRINT2 ("\n0x%x: ", p);
04798 #endif
04799           /* Note fall through.  */
04800 
04801         /* Unconditionally jump (without popping any failure points).  */
04802         case jump:
04803       EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
04804           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04805       p += mcnt;                /* Do the jump.  */
04806 #ifdef _LIBC
04807           DEBUG_PRINT2 ("(to %p).\n", p);
04808 #else
04809           DEBUG_PRINT2 ("(to 0x%x).\n", p);
04810 #endif
04811       break;
04812 
04813 
04814         /* We need this opcode so we can detect where alternatives end
04815            in `group_match_null_string_p' et al.  */
04816         case jump_past_alt:
04817           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04818           goto unconditional_jump;
04819 
04820 
04821         /* Normally, the on_failure_jump pushes a failure point, which
04822            then gets popped at pop_failure_jump.  We will end up at
04823            pop_failure_jump, also, and with a pattern of, say, `a+', we
04824            are skipping over the on_failure_jump, so we have to push
04825            something meaningless for pop_failure_jump to pop.  */
04826         case dummy_failure_jump:
04827           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04828           /* It doesn't matter what we push for the string here.  What
04829              the code at `fail' tests is the value for the pattern.  */
04830           PUSH_FAILURE_POINT (0, 0, -2);
04831           goto unconditional_jump;
04832 
04833 
04834         /* At the end of an alternative, we need to push a dummy failure
04835            point in case we are followed by a `pop_failure_jump', because
04836            we don't want the failure point for the alternative to be
04837            popped.  For example, matching `(a|ab)*' against `aab'
04838            requires that we match the `ab' alternative.  */
04839         case push_dummy_failure:
04840           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04841           /* See comments just above at `dummy_failure_jump' about the
04842              two zeroes.  */
04843           PUSH_FAILURE_POINT (0, 0, -2);
04844           break;
04845 
04846         /* Have to succeed matching what follows at least n times.
04847            After that, handle like `on_failure_jump'.  */
04848         case succeed_n:
04849           EXTRACT_NUMBER (mcnt, p + 2);
04850           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04851 
04852           assert (mcnt >= 0);
04853           /* Originally, this is how many times we HAVE to succeed.  */
04854           if (mcnt > 0)
04855             {
04856                mcnt--;
04857            p += 2;
04858                STORE_NUMBER_AND_INCR (p, mcnt);
04859 #ifdef _LIBC
04860                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
04861 #else
04862                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
04863 #endif
04864             }
04865       else if (mcnt == 0)
04866             {
04867 #ifdef _LIBC
04868               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
04869 #else
04870               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
04871 #endif
04872           p[2] = (unsigned char) no_op;
04873               p[3] = (unsigned char) no_op;
04874               goto on_failure;
04875             }
04876           break;
04877 
04878         case jump_n:
04879           EXTRACT_NUMBER (mcnt, p + 2);
04880           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
04881 
04882           /* Originally, this is how many times we CAN jump.  */
04883           if (mcnt)
04884             {
04885                mcnt--;
04886                STORE_NUMBER (p + 2, mcnt);
04887 #ifdef _LIBC
04888                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
04889 #else
04890                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
04891 #endif
04892            goto unconditional_jump;
04893             }
04894           /* If don't have to jump any more, skip over the rest of command.  */
04895       else
04896         p += 4;
04897           break;
04898 
04899     case set_number_at:
04900       {
04901             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
04902 
04903             EXTRACT_NUMBER_AND_INCR (mcnt, p);
04904             p1 = p + mcnt;
04905             EXTRACT_NUMBER_AND_INCR (mcnt, p);
04906 #ifdef _LIBC
04907             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
04908 #else
04909             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
04910 #endif
04911         STORE_NUMBER (p1, mcnt);
04912             break;
04913           }
04914 
04915 #if 0
04916     /* The DEC Alpha C compiler 3.x generates incorrect code for the
04917        test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
04918        AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
04919        macro and introducing temporary variables works around the bug.  */
04920 
04921     case wordbound:
04922       DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04923       if (AT_WORD_BOUNDARY (d))
04924         break;
04925       goto fail;
04926 
04927     case notwordbound:
04928       DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04929       if (AT_WORD_BOUNDARY (d))
04930         goto fail;
04931       break;
04932 #else
04933     case wordbound:
04934     {
04935       boolean prevchar, thischar;
04936 
04937       DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04938       if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
04939         break;
04940 
04941       prevchar = WORDCHAR_P (d - 1);
04942       thischar = WORDCHAR_P (d);
04943       if (prevchar != thischar)
04944         break;
04945       goto fail;
04946     }
04947 
04948       case notwordbound:
04949     {
04950       boolean prevchar, thischar;
04951 
04952       DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04953       if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
04954         goto fail;
04955 
04956       prevchar = WORDCHAR_P (d - 1);
04957       thischar = WORDCHAR_P (d);
04958       if (prevchar != thischar)
04959         goto fail;
04960       break;
04961     }
04962 #endif
04963 
04964     case wordbeg:
04965           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
04966       if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
04967         break;
04968           goto fail;
04969 
04970     case wordend:
04971           DEBUG_PRINT1 ("EXECUTING wordend.\n");
04972       if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
04973               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
04974         break;
04975           goto fail;
04976 
04977 #ifdef emacs
04978     case before_dot:
04979           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
04980       if (PTR_CHAR_POS ((unsigned char *) d) >= point)
04981         goto fail;
04982       break;
04983 
04984     case at_dot:
04985           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04986       if (PTR_CHAR_POS ((unsigned char *) d) != point)
04987         goto fail;
04988       break;
04989 
04990     case after_dot:
04991           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
04992           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
04993         goto fail;
04994       break;
04995 
04996     case syntaxspec:
04997           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
04998       mcnt = *p++;
04999       goto matchsyntax;
05000 
05001         case wordchar:
05002           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
05003       mcnt = (int) Sword;
05004         matchsyntax:
05005       PREFETCH ();
05006       /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
05007       d++;
05008       if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
05009         goto fail;
05010           SET_REGS_MATCHED ();
05011       break;
05012 
05013     case notsyntaxspec:
05014           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
05015       mcnt = *p++;
05016       goto matchnotsyntax;
05017 
05018         case notwordchar:
05019           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
05020       mcnt = (int) Sword;
05021         matchnotsyntax:
05022       PREFETCH ();
05023       /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
05024       d++;
05025       if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
05026         goto fail;
05027       SET_REGS_MATCHED ();
05028           break;
05029 
05030 #else /* not emacs */
05031     case wordchar:
05032           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
05033       PREFETCH ();
05034           if (!WORDCHAR_P (d))
05035             goto fail;
05036       SET_REGS_MATCHED ();
05037           d++;
05038       break;
05039 
05040     case notwordchar:
05041           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
05042       PREFETCH ();
05043       if (WORDCHAR_P (d))
05044             goto fail;
05045           SET_REGS_MATCHED ();
05046           d++;
05047       break;
05048 #endif /* not emacs */
05049 
05050         default:
05051           abort ();
05052     }
05053       continue;  /* Successfully executed one pattern command; keep going.  */
05054 
05055 
05056     /* We goto here if a matching operation fails. */
05057     fail:
05058       if (!FAIL_STACK_EMPTY ())
05059     { /* A restart point is known.  Restore to that state.  */
05060           DEBUG_PRINT1 ("\nFAIL:\n");
05061           POP_FAILURE_POINT (d, p,
05062                              lowest_active_reg, highest_active_reg,
05063                              regstart, regend, reg_info);
05064 
05065           /* If this failure point is a dummy, try the next one.  */
05066           if (!p)
05067         goto fail;
05068 
05069           /* If we failed to the end of the pattern, don't examine *p.  */
05070       assert (p <= pend);
05071           if (p < pend)
05072             {
05073               boolean is_a_jump_n = false;
05074 
05075               /* If failed to a backwards jump that's part of a repetition
05076                  loop, need to pop this failure point and use the next one.  */
05077               switch ((re_opcode_t) *p)
05078                 {
05079                 case jump_n:
05080                   is_a_jump_n = true;
05081                 case maybe_pop_jump:
05082                 case pop_failure_jump:
05083                 case jump:
05084                   p1 = p + 1;
05085                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05086                   p1 += mcnt;
05087 
05088                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
05089                       || (!is_a_jump_n
05090                           && (re_opcode_t) *p1 == on_failure_jump))
05091                     goto fail;
05092                   break;
05093                 default:
05094                   /* do nothing */ ;
05095                 }
05096             }
05097 
05098           if (d >= string1 && d <= end1)
05099         dend = end_match_1;
05100         }
05101       else
05102         break;   /* Matching at this starting point really fails.  */
05103     } /* for (;;) */
05104 
05105   if (best_regs_set)
05106     goto restore_best_regs;

static int re_match_2_internal (  )  [static]
int re_search ( struct re_pattern_buffer bufp,
const char *  string,
int  size,
int  startpos,
int  range,
struct re_registers regs 
)

Definition at line 3404 of file regex.c.

03414 {

int re_search_2 ( struct re_pattern_buffer bufp,
const char *  string1,
int  size1,
const char *  string2,
int  size2,
int  startpos,
int  range,
struct re_registers regs,
int  stop 
)

Definition at line 3437 of file regex.c.

03450 {
03451   int val;
03452   register char *fastmap = bufp->fastmap;
03453   register RE_TRANSLATE_TYPE translate = bufp->translate;
03454   int total_size = size1 + size2;
03455   int endpos = startpos + range;
03456 
03457   /* Check for out-of-range STARTPOS.  */
03458   if (startpos < 0 || startpos > total_size)
03459     return -1;
03460 
03461   /* Fix up RANGE if it might eventually take us outside
03462      the virtual concatenation of STRING1 and STRING2.
03463      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
03464   if (endpos < 0)
03465     range = 0 - startpos;
03466   else if (endpos > total_size)
03467     range = total_size - startpos;
03468 
03469   /* If the search isn't to be a backwards one, don't waste time in a
03470      search for a pattern that must be anchored.  */
03471   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
03472     {
03473       if (startpos > 0)
03474     return -1;
03475       else
03476     range = 1;
03477     }
03478 
03479 #ifdef emacs
03480   /* In a forward search for something that starts with \=.
03481      don't keep searching past point.  */
03482   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
03483     {
03484       range = PT - startpos;
03485       if (range <= 0)
03486     return -1;
03487     }
03488 #endif /* emacs */
03489 
03490   /* Update the fastmap now if not correct already.  */
03491   if (fastmap && !bufp->fastmap_accurate)
03492     if (re_compile_fastmap (bufp) == -2)
03493       return -2;
03494 
03495   /* Loop through the string, looking for a place to start matching.  */
03496   for (;;)
03497     {
03498       /* If a fastmap is supplied, skip quickly over characters that
03499          cannot be the start of a match.  If the pattern can match the
03500          null string, however, we don't need to skip characters; we want
03501          the first null string.  */
03502       if (fastmap && startpos < total_size && !bufp->can_be_null)
03503     {
03504       if (range > 0)    /* Searching forwards.  */
03505         {
03506           register const char *d;
03507           register int lim = 0;
03508           int irange = range;
03509 
03510               if (startpos < size1 && startpos + range >= size1)
03511                 lim = range - (size1 - startpos);
03512 
03513           d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03514 
03515               /* Written out as an if-else to avoid testing `translate'
03516                  inside the loop.  */
03517           if (translate)
03518                 while (range > lim
03519                        && !fastmap[(unsigned char)
03520                    translate[(unsigned char) *d++]])
03521                   range--;
03522           else
03523                 while (range > lim && !fastmap[(unsigned char) *d++])
03524                   range--;
03525 
03526           startpos += irange - range;
03527         }
03528       else              /* Searching backwards.  */
03529         {
03530           register char c = (size1 == 0 || startpos >= size1
03531                                  ? string2[startpos - size1]
03532                                  : string1[startpos]);
03533 
03534           if (!fastmap[(unsigned char) TRANSLATE (c)])
03535         goto advance;
03536         }
03537     }
03538 
03539       /* If can't match the null string, and that's all we have left, fail.  */
03540       if (range >= 0 && startpos == total_size && fastmap
03541           && !bufp->can_be_null)
03542     return -1;
03543 
03544       val = re_match_2_internal (bufp, string1, size1, string2, size2,
03545                  startpos, regs, stop);
03546 #ifndef REGEX_MALLOC
03547 #ifdef C_ALLOCA
03548       alloca (0);
03549 #endif
03550 #endif
03551 
03552       if (val >= 0)
03553     return startpos;
03554 
03555       if (val == -2)
03556     return -2;
03557 
03558     advance:
03559       if (!range)
03560         break;
03561       else if (range > 0)
03562         {
03563           range--;
03564           startpos++;
03565         }
03566       else
03567         {
03568           range++;

void re_set_registers ( struct re_pattern_buffer bufp,
struct re_registers regs,
unsigned  num_regs,
regoff_t starts,
regoff_t ends 
)

Definition at line 3377 of file regex.c.

03387 {
03388   if (num_regs)
03389     {
03390       bufp->regs_allocated = REGS_REALLOCATE;
03391       regs->num_regs = num_regs;
03392       regs->start = starts;
03393       regs->end = ends;
03394     }
03395   else
03396     {

reg_syntax_t re_set_syntax ( reg_syntax_t  syntax  ) 

Definition at line 953 of file regex.c.

00955 {
00956   reg_syntax_t ret = re_syntax_options;
00957 
00958   re_syntax_options = syntax;
00959 #ifdef DEBUG
00960   if (syntax & RE_DEBUG)
00961     debug = 1;
00962   else if (debug) /* was on but now is not */
00963     debug = 0;
00964 #endif /* DEBUG */
00965   return ret;
00966 }

int regcomp ( regex_t preg,
const char *  pattern,
int  cflags 
)

Definition at line 5517 of file regex.c.

05526 {
05527   reg_errcode_t ret;
05528   reg_syntax_t syntax
05529     = (cflags & REG_EXTENDED) ?
05530       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
05531 
05532   /* regex_compile will allocate the space for the compiled pattern.  */
05533   preg->buffer = 0;
05534   preg->allocated = 0;
05535   preg->used = 0;
05536 
05537   /* Don't bother to use a fastmap when searching.  This simplifies the
05538      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
05539      characters after newlines into the fastmap.  This way, we just try
05540      every character.  */
05541   preg->fastmap = 0;
05542 
05543   if (cflags & REG_ICASE)
05544     {
05545       unsigned i;
05546 
05547       preg->translate
05548     = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
05549                       * sizeof (*(RE_TRANSLATE_TYPE)0));
05550       if (preg->translate == NULL)
05551         return (int) REG_ESPACE;
05552 
05553       /* Map uppercase characters to corresponding lowercase ones.  */
05554       for (i = 0; i < CHAR_SET_SIZE; i++)
05555         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
05556     }
05557   else
05558     preg->translate = NULL;
05559 
05560   /* If REG_NEWLINE is set, newlines are treated differently.  */
05561   if (cflags & REG_NEWLINE)
05562     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
05563       syntax &= ~RE_DOT_NEWLINE;
05564       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
05565       /* It also changes the matching behavior.  */
05566       preg->newline_anchor = 1;
05567     }
05568   else
05569     preg->newline_anchor = 0;
05570 
05571   preg->no_sub = !!(cflags & REG_NOSUB);
05572 
05573   /* POSIX says a null character in the pattern terminates it, so we
05574      can use strlen here in compiling the pattern.  */
05575   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
05576 
05577   /* POSIX doesn't distinguish between an unmatched open-group and an

size_t regerror ( int  errcode,
const regex_t preg,
char *  errbuf,
size_t  errbuf_size 
)

Definition at line 5660 of file regex.c.

05666 {
05667   const char *msg;
05668   size_t msg_size;
05669 
05670   if (errcode < 0
05671       || errcode >= (int) (sizeof (re_error_msgid)
05672                / sizeof (re_error_msgid[0])))
05673     /* Only error codes returned by the rest of the code should be passed
05674        to this routine.  If we are given anything else, or if other regex
05675        code generates an invalid error code, then the program has a bug.
05676        Dump core so we can fix it.  */
05677     abort ();
05678 
05679   msg = gettext (re_error_msgid[errcode]);
05680 
05681   msg_size = strlen (msg) + 1; /* Includes the null.  */
05682 
05683   if (errbuf_size != 0)
05684     {
05685       if (msg_size > errbuf_size)
05686         {
05687           strncpy (errbuf, msg, errbuf_size - 1);
05688           errbuf[errbuf_size - 1] = 0;
05689         }
05690       else

static reg_errcode_t regex_compile ( char *  pattern,
size_t  size,
reg_syntax_t  syntax,
struct re_pattern_buffer bufp 
) const [static]

Definition at line 1776 of file regex.c.

01786 {
01787   /* We fetch characters from PATTERN here.  Even though PATTERN is
01788      `char *' (i.e., signed), we declare these variables as unsigned, so
01789      they can be reliably used as array indices.  */
01790   register unsigned char c, c1;
01791 
01792   /* A random temporary spot in PATTERN.  */
01793   const char *p1;
01794 
01795   /* Points to the end of the buffer, where we should append.  */
01796   register unsigned char *b;
01797 
01798   /* Keeps track of unclosed groups.  */
01799   compile_stack_type compile_stack;
01800 
01801   /* Points to the current (ending) position in the pattern.  */
01802   const char *p = pattern;
01803   const char *pend = pattern + size;
01804 
01805   /* How to translate the characters in the pattern.  */
01806   RE_TRANSLATE_TYPE translate = bufp->translate;
01807 
01808   /* Address of the count-byte of the most recently inserted `exactn'
01809      command.  This makes it possible to tell if a new exact-match
01810      character can be added to that command or if the character requires
01811      a new `exactn' command.  */
01812   unsigned char *pending_exact = 0;
01813 
01814   /* Address of start of the most recently finished expression.
01815      This tells, e.g., postfix * where to find the start of its
01816      operand.  Reset at the beginning of groups and alternatives.  */
01817   unsigned char *laststart = 0;
01818 
01819   /* Address of beginning of regexp, or inside of last group.  */
01820   unsigned char *begalt;
01821 
01822   /* Place in the uncompiled pattern (i.e., the {) to
01823      which to go back if the interval is invalid.  */
01824   const char *beg_interval;
01825 
01826   /* Address of the place where a forward jump should go to the end of
01827      the containing expression.  Each alternative of an `or' -- except the
01828      last -- ends with a forward jump of this sort.  */
01829   unsigned char *fixup_alt_jump = 0;
01830 
01831   /* Counts open-groups as they are encountered.  Remembered for the
01832      matching close-group on the compile stack, so the same register
01833      number is put in the stop_memory as the start_memory.  */
01834   regnum_t regnum = 0;
01835 
01836 #ifdef DEBUG
01837   DEBUG_PRINT1 ("\nCompiling pattern: ");
01838   if (debug)
01839     {
01840       unsigned debug_count;
01841 
01842       for (debug_count = 0; debug_count < size; debug_count++)
01843         putchar (pattern[debug_count]);
01844       putchar ('\n');
01845     }
01846 #endif /* DEBUG */
01847 
01848   /* Initialize the compile stack.  */
01849   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01850   if (compile_stack.stack == NULL)
01851     return REG_ESPACE;
01852 
01853   compile_stack.size = INIT_COMPILE_STACK_SIZE;
01854   compile_stack.avail = 0;
01855 
01856   /* Initialize the pattern buffer.  */
01857   bufp->syntax = syntax;
01858   bufp->fastmap_accurate = 0;
01859   bufp->not_bol = bufp->not_eol = 0;
01860 
01861   /* Set `used' to zero, so that if we return an error, the pattern
01862      printer (for debugging) will think there's no pattern.  We reset it
01863      at the end.  */
01864   bufp->used = 0;
01865 
01866   /* Always count groups, whether or not bufp->no_sub is set.  */
01867   bufp->re_nsub = 0;
01868 
01869 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01870   /* Initialize the syntax table.  */
01871    init_syntax_once ();
01872 #endif
01873 
01874   if (bufp->allocated == 0)
01875     {
01876       if (bufp->buffer)
01877     { /* If zero allocated, but buffer is non-null, try to realloc
01878              enough space.  This loses if buffer's address is bogus, but
01879              that is the user's responsibility.  */
01880           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01881         }
01882       else
01883         { /* Caller did not allocate a buffer.  Do it for them.  */
01884           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01885         }
01886       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
01887 
01888       bufp->allocated = INIT_BUF_SIZE;
01889     }
01890 
01891   begalt = b = bufp->buffer;
01892 
01893   /* Loop through the uncompiled pattern until we're at the end.  */
01894   while (p != pend)
01895     {
01896       PATFETCH (c);
01897 
01898       switch (c)
01899         {
01900         case '^':
01901           {
01902             if (   /* If at start of pattern, it's an operator.  */
01903                    p == pattern + 1
01904                    /* If context independent, it's an operator.  */
01905                 || syntax & RE_CONTEXT_INDEP_ANCHORS
01906                    /* Otherwise, depends on what's come before.  */
01907                 || at_begline_loc_p (pattern, p, syntax))
01908               BUF_PUSH (begline);
01909             else
01910               goto normal_char;
01911           }
01912           break;
01913 
01914 
01915         case '$':
01916           {
01917             if (   /* If at end of pattern, it's an operator.  */
01918                    p == pend
01919                    /* If context independent, it's an operator.  */
01920                 || syntax & RE_CONTEXT_INDEP_ANCHORS
01921                    /* Otherwise, depends on what's next.  */
01922                 || at_endline_loc_p (p, pend, syntax))
01923                BUF_PUSH (endline);
01924              else
01925                goto normal_char;
01926            }
01927            break;
01928 
01929 
01930     case '+':
01931         case '?':
01932           if ((syntax & RE_BK_PLUS_QM)
01933               || (syntax & RE_LIMITED_OPS))
01934             goto normal_char;
01935         handle_plus:
01936         case '*':
01937           /* If there is no previous pattern... */
01938           if (!laststart)
01939             {
01940               if (syntax & RE_CONTEXT_INVALID_OPS)
01941                 FREE_STACK_RETURN (REG_BADRPT);
01942               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01943                 goto normal_char;
01944             }
01945 
01946           {
01947             /* Are we optimizing this jump?  */
01948             boolean keep_string_p = false;
01949 
01950             /* 1 means zero (many) matches is allowed.  */
01951             char zero_times_ok = 0, many_times_ok = 0;
01952 
01953             /* If there is a sequence of repetition chars, collapse it
01954                down to just one (the right one).  We can't combine
01955                interval operators with these because of, e.g., `a{2}*',
01956                which should only match an even number of `a's.  */
01957 
01958             for (;;)
01959               {
01960                 zero_times_ok |= c != '+';
01961                 many_times_ok |= c != '?';
01962 
01963                 if (p == pend)
01964                   break;
01965 
01966                 PATFETCH (c);
01967 
01968                 if (c == '*'
01969                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01970                   ;
01971 
01972                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
01973                   {
01974                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
01975 
01976                     PATFETCH (c1);
01977                     if (!(c1 == '+' || c1 == '?'))
01978                       {
01979                         PATUNFETCH;
01980                         PATUNFETCH;
01981                         break;
01982                       }
01983 
01984                     c = c1;
01985                   }
01986                 else
01987                   {
01988                     PATUNFETCH;
01989                     break;
01990                   }
01991 
01992                 /* If we get here, we found another repeat character.  */
01993                }
01994 
01995             /* Star, etc. applied to an empty pattern is equivalent
01996                to an empty pattern.  */
01997             if (!laststart)
01998               break;
01999 
02000             /* Now we know whether or not zero matches is allowed
02001                and also whether or not two or more matches is allowed.  */
02002             if (many_times_ok)
02003               { /* More than one repetition is allowed, so put in at the
02004                    end a backward relative jump from `b' to before the next
02005                    jump we're going to put in below (which jumps from
02006                    laststart to after this jump).
02007 
02008                    But if we are at the `*' in the exact sequence `.*\n',
02009                    insert an unconditional jump backwards to the .,
02010                    instead of the beginning of the loop.  This way we only
02011                    push a failure point once, instead of every time
02012                    through the loop.  */
02013                 assert (p - 1 > pattern);
02014 
02015                 /* Allocate the space for the jump.  */
02016                 GET_BUFFER_SPACE (3);
02017 
02018                 /* We know we are not at the first character of the pattern,
02019                    because laststart was nonzero.  And we've already
02020                    incremented `p', by the way, to be the character after
02021                    the `*'.  Do we have to do something analogous here
02022                    for null bytes, because of RE_DOT_NOT_NULL?  */
02023                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
02024             && zero_times_ok
02025                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
02026                     && !(syntax & RE_DOT_NEWLINE))
02027                   { /* We have .*\n.  */
02028                     STORE_JUMP (jump, b, laststart);
02029                     keep_string_p = true;
02030                   }
02031                 else
02032                   /* Anything else.  */
02033                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
02034 
02035                 /* We've added more stuff to the buffer.  */
02036                 b += 3;
02037               }
02038 
02039             /* On failure, jump from laststart to b + 3, which will be the
02040                end of the buffer after this jump is inserted.  */
02041             GET_BUFFER_SPACE (3);
02042             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
02043                                        : on_failure_jump,
02044                          laststart, b + 3);
02045             pending_exact = 0;
02046             b += 3;
02047 
02048             if (!zero_times_ok)
02049               {
02050                 /* At least one repetition is required, so insert a
02051                    `dummy_failure_jump' before the initial
02052                    `on_failure_jump' instruction of the loop. This
02053                    effects a skip over that instruction the first time
02054                    we hit that loop.  */
02055                 GET_BUFFER_SPACE (3);
02056                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
02057                 b += 3;
02058               }
02059             }
02060       break;
02061 
02062 
02063     case '.':
02064           laststart = b;
02065           BUF_PUSH (anychar);
02066           break;
02067 
02068 
02069         case '[':
02070           {
02071             boolean had_char_class = false;
02072 
02073             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02074 
02075             /* Ensure that we have enough space to push a charset: the
02076                opcode, the length count, and the bitset; 34 bytes in all.  */
02077         GET_BUFFER_SPACE (34);
02078 
02079             laststart = b;
02080 
02081             /* We test `*p == '^' twice, instead of using an if
02082                statement, so we only need one BUF_PUSH.  */
02083             BUF_PUSH (*p == '^' ? charset_not : charset);
02084             if (*p == '^')
02085               p++;
02086 
02087             /* Remember the first position in the bracket expression.  */
02088             p1 = p;
02089 
02090             /* Push the number of bytes in the bitmap.  */
02091             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
02092 
02093             /* Clear the whole map.  */
02094             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
02095 
02096             /* charset_not matches newline according to a syntax bit.  */
02097             if ((re_opcode_t) b[-2] == charset_not
02098                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
02099               SET_LIST_BIT ('\n');
02100 
02101             /* Read in characters and ranges, setting map bits.  */
02102             for (;;)
02103               {
02104                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02105 
02106                 PATFETCH (c);
02107 
02108                 /* \ might escape characters inside [...] and [^...].  */
02109                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
02110                   {
02111                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02112 
02113                     PATFETCH (c1);
02114                     SET_LIST_BIT (c1);
02115                     continue;
02116                   }
02117 
02118                 /* Could be the end of the bracket expression.  If it's
02119                    not (i.e., when the bracket expression is `[]' so
02120                    far), the ']' character bit gets set way below.  */
02121                 if (c == ']' && p != p1 + 1)
02122                   break;
02123 
02124                 /* Look ahead to see if it's a range when the last thing
02125                    was a character class.  */
02126                 if (had_char_class && c == '-' && *p != ']')
02127                   FREE_STACK_RETURN (REG_ERANGE);
02128 
02129                 /* Look ahead to see if it's a range when the last thing
02130                    was a character: if this is a hyphen not at the
02131                    beginning or the end of a list, then it's the range
02132                    operator.  */
02133                 if (c == '-'
02134                     && !(p - 2 >= pattern && p[-2] == '[')
02135                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
02136                     && *p != ']')
02137                   {
02138                     reg_errcode_t ret
02139                       = compile_range (&p, pend, translate, syntax, b);
02140                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02141                   }
02142 
02143                 else if (p[0] == '-' && p[1] != ']')
02144                   { /* This handles ranges made up of characters only.  */
02145                     reg_errcode_t ret;
02146 
02147             /* Move past the `-'.  */
02148                     PATFETCH (c1);
02149 
02150                     ret = compile_range (&p, pend, translate, syntax, b);
02151                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02152                   }
02153 
02154                 /* See if we're at the beginning of a possible character
02155                    class.  */
02156 
02157                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02158                   { /* Leave room for the null.  */
02159                     char str[CHAR_CLASS_MAX_LENGTH + 1];
02160 
02161                     PATFETCH (c);
02162                     c1 = 0;
02163 
02164                     /* If pattern is `[[:'.  */
02165                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02166 
02167                     for (;;)
02168                       {
02169                         PATFETCH (c);
02170                         if (c == ':' || c == ']' || p == pend
02171                             || c1 == CHAR_CLASS_MAX_LENGTH)
02172                           break;
02173                         str[c1++] = c;
02174                       }
02175                     str[c1] = '\0';
02176 
02177                     /* If isn't a word bracketed by `[:' and:`]':
02178                        undo the ending character, the letters, and leave
02179                        the leading `:' and `[' (but set bits for them).  */
02180                     if (c == ':' && *p == ']')
02181                       {
02182 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
02183                         boolean is_lower = STREQ (str, "lower");
02184                         boolean is_upper = STREQ (str, "upper");
02185             wctype_t wt;
02186                         int ch;
02187 
02188             wt = wctype (str);
02189             if (wt == 0)
02190               FREE_STACK_RETURN (REG_ECTYPE);
02191 
02192                         /* Throw away the ] at the end of the character
02193                            class.  */
02194                         PATFETCH (c);
02195 
02196                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02197 
02198                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
02199               {
02200                 if (iswctype (btowc (ch), wt))
02201                   SET_LIST_BIT (ch);
02202 
02203                 if (translate && (is_upper || is_lower)
02204                 && (ISUPPER (ch) || ISLOWER (ch)))
02205                   SET_LIST_BIT (ch);
02206               }
02207 
02208                         had_char_class = true;
02209 #else
02210                         int ch;
02211                         boolean is_alnum = STREQ (str, "alnum");
02212                         boolean is_alpha = STREQ (str, "alpha");
02213                         boolean is_blank = STREQ (str, "blank");
02214                         boolean is_cntrl = STREQ (str, "cntrl");
02215                         boolean is_digit = STREQ (str, "digit");
02216                         boolean is_graph = STREQ (str, "graph");
02217                         boolean is_lower = STREQ (str, "lower");
02218                         boolean is_print = STREQ (str, "print");
02219                         boolean is_punct = STREQ (str, "punct");
02220                         boolean is_space = STREQ (str, "space");
02221                         boolean is_upper = STREQ (str, "upper");
02222                         boolean is_xdigit = STREQ (str, "xdigit");
02223 
02224                         if (!IS_CHAR_CLASS (str))
02225               FREE_STACK_RETURN (REG_ECTYPE);
02226 
02227                         /* Throw away the ] at the end of the character
02228                            class.  */
02229                         PATFETCH (c);
02230 
02231                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02232 
02233                                 for (ch = 0; ch < (1 << BYTEWIDTH); ch++)
02234                           {
02235                 /* This was split into 3 if's to
02236                    avoid an arbitrary limit in some compiler.  */
02237                             if (   (is_alnum  && ISALNUM (ch))
02238                                 || (is_alpha  && ISALPHA (ch))
02239                                 || (is_blank  && ISBLANK (ch))
02240                                 || (is_cntrl  && ISCNTRL (ch)))
02241                   SET_LIST_BIT (ch);
02242                 if (   (is_digit  && ISDIGIT (ch))
02243                                 || (is_graph  && ISGRAPH (ch))
02244                                 || (is_lower  && ISLOWER (ch))
02245                                 || (is_print  && ISPRINT (ch)))
02246                   SET_LIST_BIT (ch);
02247                 if (   (is_punct  && ISPUNCT (ch))
02248                                 || (is_space  && ISSPACE (ch))
02249                                 || (is_upper  && ISUPPER (ch))
02250                                 || (is_xdigit && ISXDIGIT (ch)))
02251                   SET_LIST_BIT (ch);
02252                 if (   translate && (is_upper || is_lower)
02253                 && (ISUPPER (ch) || ISLOWER (ch)))
02254                   SET_LIST_BIT (ch);
02255                           }
02256                         had_char_class = true;
02257 #endif  /* libc || wctype.h */
02258                       }
02259                     else
02260                       {
02261                         c1++;
02262                         while (c1--)
02263                           PATUNFETCH;
02264                         SET_LIST_BIT ('[');
02265                         SET_LIST_BIT (':');
02266                         had_char_class = false;
02267                       }
02268                   }
02269                 else
02270                   {
02271                     had_char_class = false;
02272                     SET_LIST_BIT (c);
02273                   }
02274               }
02275 
02276             /* Discard any (non)matching list bytes that are all 0 at the
02277                end of the map.  Decrease the map-length byte too.  */
02278             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
02279               b[-1]--;
02280             b += b[-1];
02281           }
02282           break;
02283 
02284 
02285     case '(':
02286           if (syntax & RE_NO_BK_PARENS)
02287             goto handle_open;
02288           else
02289             goto normal_char;
02290 
02291 
02292         case ')':
02293           if (syntax & RE_NO_BK_PARENS)
02294             goto handle_close;
02295           else
02296             goto normal_char;
02297 
02298 
02299         case '\n':
02300           if (syntax & RE_NEWLINE_ALT)
02301             goto handle_alt;
02302           else
02303             goto normal_char;
02304 
02305 
02306     case '|':
02307           if (syntax & RE_NO_BK_VBAR)
02308             goto handle_alt;
02309           else
02310             goto normal_char;
02311 
02312 
02313         case '{':
02314            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
02315              goto handle_interval;
02316            else
02317              goto normal_char;
02318 
02319 
02320         case '\\':
02321           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02322 
02323           /* Do not translate the character after the \, so that we can
02324              distinguish, e.g., \B from \b, even if we normally would
02325              translate, e.g., B to b.  */
02326           PATFETCH_RAW (c);
02327 
02328           switch (c)
02329             {
02330             case '(':
02331               if (syntax & RE_NO_BK_PARENS)
02332                 goto normal_backslash;
02333 
02334             handle_open:
02335               bufp->re_nsub++;
02336               regnum++;
02337 
02338               if (COMPILE_STACK_FULL)
02339                 {
02340                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
02341                             compile_stack_elt_t);
02342                   if (compile_stack.stack == NULL) return REG_ESPACE;
02343 
02344                   compile_stack.size <<= 1;
02345                 }
02346 
02347               /* These are the values to restore when we hit end of this
02348                  group.  They are all relative offsets, so that if the
02349                  whole pattern moves because of realloc, they will still
02350                  be valid.  */
02351               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
02352               COMPILE_STACK_TOP.fixup_alt_jump
02353                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
02354               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
02355               COMPILE_STACK_TOP.regnum = regnum;
02356 
02357               /* We will eventually replace the 0 with the number of
02358                  groups inner to this one.  But do not push a
02359                  start_memory for groups beyond the last one we can
02360                  represent in the compiled pattern.  */
02361               if (regnum <= MAX_REGNUM)
02362                 {
02363                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
02364                   BUF_PUSH_3 (start_memory, regnum, 0);
02365                 }
02366 
02367               compile_stack.avail++;
02368 
02369               fixup_alt_jump = 0;
02370               laststart = 0;
02371               begalt = b;
02372           /* If we've reached MAX_REGNUM groups, then this open
02373          won't actually generate any code, so we'll have to
02374          clear pending_exact explicitly.  */
02375           pending_exact = 0;
02376               break;
02377 
02378 
02379             case ')':
02380               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
02381 
02382               if (COMPILE_STACK_EMPTY) {
02383                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02384                   goto normal_backslash;
02385         }
02386                 else FREE_STACK_RETURN (REG_ERPAREN);
02387 
02388             handle_close:
02389               if (fixup_alt_jump)
02390                 { /* Push a dummy failure point at the end of the
02391                      alternative for a possible future
02392                      `pop_failure_jump' to pop.  See comments at
02393                      `push_dummy_failure' in `re_match_2'.  */
02394                   BUF_PUSH (push_dummy_failure);
02395 
02396                   /* We allocated space for this jump when we assigned
02397                      to `fixup_alt_jump', in the `handle_alt' case below.  */
02398                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
02399                 }
02400 
02401               /* See similar code for backslashed left paren above.  */
02402               if (COMPILE_STACK_EMPTY) {
02403                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02404                   goto normal_char;
02405         }
02406                 else FREE_STACK_RETURN (REG_ERPAREN);
02407 
02408               /* Since we just checked for an empty stack above, this
02409                  ``can't happen''.  */
02410               assert (compile_stack.avail != 0);
02411               {
02412                 /* We don't just want to restore into `regnum', because
02413                    later groups should continue to be numbered higher,
02414                    as in `(ab)c(de)' -- the second group is #2.  */
02415                 regnum_t this_group_regnum;
02416 
02417                 compile_stack.avail--;
02418                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
02419                 fixup_alt_jump
02420                   = COMPILE_STACK_TOP.fixup_alt_jump
02421                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
02422                     : 0;
02423                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
02424                 this_group_regnum = COMPILE_STACK_TOP.regnum;
02425         /* If we've reached MAX_REGNUM groups, then this open
02426            won't actually generate any code, so we'll have to
02427            clear pending_exact explicitly.  */
02428         pending_exact = 0;
02429 
02430                 /* We're at the end of the group, so now we know how many
02431                    groups were inside this one.  */
02432                 if (this_group_regnum <= MAX_REGNUM)
02433                   {
02434                     unsigned char *inner_group_loc
02435                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
02436 
02437                     *inner_group_loc = regnum - this_group_regnum;
02438                     BUF_PUSH_3 (stop_memory, this_group_regnum,
02439                                           regnum - this_group_regnum);
02440                   }
02441               }
02442               break;
02443 
02444 
02445             case '|':                   /* `\|'.  */
02446               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
02447                 goto normal_backslash;
02448             handle_alt:
02449               if (syntax & RE_LIMITED_OPS)
02450                 goto normal_char;
02451 
02452               /* Insert before the previous alternative a jump which
02453                  jumps to this alternative if the former fails.  */
02454               GET_BUFFER_SPACE (3);
02455               INSERT_JUMP (on_failure_jump, begalt, b + 6);
02456               pending_exact = 0;
02457               b += 3;
02458 
02459               /* The alternative before this one has a jump after it
02460                  which gets executed if it gets matched.  Adjust that
02461                  jump so it will jump to this alternative's analogous
02462                  jump (put in below, which in turn will jump to the next
02463                  (if any) alternative's such jump, etc.).  The last such
02464                  jump jumps to the correct final destination.  A picture:
02465                           _____ _____
02466                           |   | |   |
02467                           |   v |   v
02468                          a | b   | c
02469 
02470                  If we are at `b', then fixup_alt_jump right now points to a
02471                  three-byte space after `a'.  We'll put in the jump, set
02472                  fixup_alt_jump to right after `b', and leave behind three
02473                  bytes which we'll fill in when we get to after `c'.  */
02474 
02475               if (fixup_alt_jump)
02476                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02477 
02478               /* Mark and leave space for a jump after this alternative,
02479                  to be filled in later either by next alternative or
02480                  when know we're at the end of a series of alternatives.  */
02481               fixup_alt_jump = b;
02482               GET_BUFFER_SPACE (3);
02483               b += 3;
02484 
02485               laststart = 0;
02486               begalt = b;
02487               break;
02488 
02489 
02490             case '{':
02491               /* If \{ is a literal.  */
02492               if (!(syntax & RE_INTERVALS)
02493                      /* If we're at `\{' and it's not the open-interval
02494                         operator.  */
02495                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
02496                   || (p - 2 == pattern  &&  p == pend))
02497                 goto normal_backslash;
02498 
02499             handle_interval:
02500               {
02501                 /* If got here, then the syntax allows intervals.  */
02502 
02503                 /* At least (most) this many matches must be made.  */
02504                 int lower_bound = -1, upper_bound = -1;
02505 
02506                 beg_interval = p - 1;
02507 
02508                 if (p == pend)
02509                   {
02510                     if (syntax & RE_NO_BK_BRACES)
02511                       goto unfetch_interval;
02512                     else
02513                       FREE_STACK_RETURN (REG_EBRACE);
02514                   }
02515 
02516                 GET_UNSIGNED_NUMBER (lower_bound);
02517 
02518                 if (c == ',')
02519                   {
02520                     GET_UNSIGNED_NUMBER (upper_bound);
02521                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
02522                   }
02523                 else
02524                   /* Interval such as `{1}' => match exactly once. */
02525                   upper_bound = lower_bound;
02526 
02527                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
02528                     || lower_bound > upper_bound)
02529                   {
02530                     if (syntax & RE_NO_BK_BRACES)
02531                       goto unfetch_interval;
02532                     else
02533                       FREE_STACK_RETURN (REG_BADBR);
02534                   }
02535 
02536                 if (!(syntax & RE_NO_BK_BRACES))
02537                   {
02538                     if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
02539 
02540                     PATFETCH (c);
02541                   }
02542 
02543                 if (c != '}')
02544                   {
02545                     if (syntax & RE_NO_BK_BRACES)
02546                       goto unfetch_interval;
02547                     else
02548                       FREE_STACK_RETURN (REG_BADBR);
02549                   }
02550 
02551                 /* We just parsed a valid interval.  */
02552 
02553                 /* If it's invalid to have no preceding re.  */
02554                 if (!laststart)
02555                   {
02556                     if (syntax & RE_CONTEXT_INVALID_OPS)
02557                       FREE_STACK_RETURN (REG_BADRPT);
02558                     else if (syntax & RE_CONTEXT_INDEP_OPS)
02559                       laststart = b;
02560                     else
02561                       goto unfetch_interval;
02562                   }
02563 
02564                 /* If the upper bound is zero, don't want to succeed at
02565                    all; jump from `laststart' to `b + 3', which will be
02566                    the end of the buffer after we insert the jump.  */
02567                  if (upper_bound == 0)
02568                    {
02569                      GET_BUFFER_SPACE (3);
02570                      INSERT_JUMP (jump, laststart, b + 3);
02571                      b += 3;
02572                    }
02573 
02574                  /* Otherwise, we have a nontrivial interval.  When
02575                     we're all done, the pattern will look like:
02576                       set_number_at <jump count> <upper bound>
02577                       set_number_at <succeed_n count> <lower bound>
02578                       succeed_n <after jump addr> <succeed_n count>
02579                       <body of loop>
02580                       jump_n <succeed_n addr> <jump count>
02581                     (The upper bound and `jump_n' are omitted if
02582                     `upper_bound' is 1, though.)  */
02583                  else
02584                    { /* If the upper bound is > 1, we need to insert
02585                         more at the end of the loop.  */
02586                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
02587 
02588                      GET_BUFFER_SPACE (nbytes);
02589 
02590                      /* Initialize lower bound of the `succeed_n', even
02591                         though it will be set during matching by its
02592                         attendant `set_number_at' (inserted next),
02593                         because `re_compile_fastmap' needs to know.
02594                         Jump to the `jump_n' we might insert below.  */
02595                      INSERT_JUMP2 (succeed_n, laststart,
02596                                    b + 5 + (upper_bound > 1) * 5,
02597                                    lower_bound);
02598                      b += 5;
02599 
02600                      /* Code to initialize the lower bound.  Insert
02601                         before the `succeed_n'.  The `5' is the last two
02602                         bytes of this `set_number_at', plus 3 bytes of
02603                         the following `succeed_n'.  */
02604                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
02605                      b += 5;
02606 
02607                      if (upper_bound > 1)
02608                        { /* More than one repetition is allowed, so
02609                             append a backward jump to the `succeed_n'
02610                             that starts this interval.
02611 
02612                             When we've reached this during matching,
02613                             we'll have matched the interval once, so
02614                             jump back only `upper_bound - 1' times.  */
02615                          STORE_JUMP2 (jump_n, b, laststart + 5,
02616                                       upper_bound - 1);
02617                          b += 5;
02618 
02619                          /* The location we want to set is the second
02620                             parameter of the `jump_n'; that is `b-2' as
02621                             an absolute address.  `laststart' will be
02622                             the `set_number_at' we're about to insert;
02623                             `laststart+3' the number to set, the source
02624                             for the relative address.  But we are
02625                             inserting into the middle of the pattern --
02626                             so everything is getting moved up by 5.
02627                             Conclusion: (b - 2) - (laststart + 3) + 5,
02628                             i.e., b - laststart.
02629 
02630                             We insert this at the beginning of the loop
02631                             so that if we fail during matching, we'll
02632                             reinitialize the bounds.  */
02633                          insert_op2 (set_number_at, laststart, b - laststart,
02634                                      upper_bound - 1, b);
02635                          b += 5;
02636                        }
02637                    }
02638                 pending_exact = 0;
02639                 beg_interval = NULL;
02640               }
02641               break;
02642 
02643             unfetch_interval:
02644               /* If an invalid interval, match the characters as literals.  */
02645                assert (beg_interval);
02646                p = beg_interval;
02647                beg_interval = NULL;
02648 
02649                /* normal_char and normal_backslash need `c'.  */
02650                PATFETCH (c);
02651 
02652                if (!(syntax & RE_NO_BK_BRACES))
02653                  {
02654                    if (p > pattern  &&  p[-1] == '\\')
02655                      goto normal_backslash;
02656                  }
02657                goto normal_char;
02658 
02659 #ifdef emacs
02660             /* There is no way to specify the before_dot and after_dot
02661                operators.  rms says this is ok.  --karl  */
02662             case '=':
02663               BUF_PUSH (at_dot);
02664               break;
02665 
02666             case 's':
02667               laststart = b;
02668               PATFETCH (c);
02669               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02670               break;
02671 
02672             case 'S':
02673               laststart = b;
02674               PATFETCH (c);
02675               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02676               break;
02677 #endif /* emacs */
02678 
02679 
02680             case 'w':
02681           if (re_syntax_options & RE_NO_GNU_OPS)
02682         goto normal_char;
02683               laststart = b;
02684               BUF_PUSH (wordchar);
02685               break;
02686 
02687 
02688             case 'W':
02689           if (re_syntax_options & RE_NO_GNU_OPS)
02690         goto normal_char;
02691               laststart = b;
02692               BUF_PUSH (notwordchar);
02693               break;
02694 
02695 
02696             case '<':
02697           if (re_syntax_options & RE_NO_GNU_OPS)
02698         goto normal_char;
02699               BUF_PUSH (wordbeg);
02700               break;
02701 
02702             case '>':
02703           if (re_syntax_options & RE_NO_GNU_OPS)
02704         goto normal_char;
02705               BUF_PUSH (wordend);
02706               break;
02707 
02708             case 'b':
02709           if (re_syntax_options & RE_NO_GNU_OPS)
02710         goto normal_char;
02711               BUF_PUSH (wordbound);
02712               break;
02713 
02714             case 'B':
02715           if (re_syntax_options & RE_NO_GNU_OPS)
02716         goto normal_char;
02717               BUF_PUSH (notwordbound);
02718               break;
02719 
02720             case '`':
02721           if (re_syntax_options & RE_NO_GNU_OPS)
02722         goto normal_char;
02723               BUF_PUSH (begbuf);
02724               break;
02725 
02726             case '\'':
02727           if (re_syntax_options & RE_NO_GNU_OPS)
02728         goto normal_char;
02729               BUF_PUSH (endbuf);
02730               break;
02731 
02732             case '1': case '2': case '3': case '4': case '5':
02733             case '6': case '7': case '8': case '9':
02734               if (syntax & RE_NO_BK_REFS)
02735                 goto normal_char;
02736 
02737               c1 = c - '0';
02738 
02739               if (c1 > regnum)
02740                 FREE_STACK_RETURN (REG_ESUBREG);
02741 
02742               /* Can't back reference to a subexpression if inside of it.  */
02743               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
02744                 goto normal_char;
02745 
02746               laststart = b;
02747               BUF_PUSH_2 (duplicate, c1);
02748               break;
02749 
02750 
02751             case '+':
02752             case '?':
02753               if (syntax & RE_BK_PLUS_QM)
02754                 goto handle_plus;
02755               else
02756                 goto normal_backslash;
02757 
02758             default:
02759             normal_backslash:
02760               /* You might think it would be useful for \ to mean
02761                  not to translate; but if we don't translate it
02762                  it will never match anything.  */
02763               c = TRANSLATE (c);
02764               goto normal_char;
02765             }
02766           break;
02767 
02768 
02769     default:
02770         /* Expects the character in `c'.  */
02771     normal_char:
02772           /* If no exactn currently being built.  */
02773           if (!pending_exact
02774 
02775               /* If last exactn not at current position.  */
02776               || pending_exact + *pending_exact + 1 != b
02777 
02778               /* We have only one byte following the exactn for the count.  */
02779           || *pending_exact == (1 << BYTEWIDTH) - 1
02780 
02781               /* If followed by a repetition operator.  */
02782               || *p == '*' || *p == '^'
02783           || ((syntax & RE_BK_PLUS_QM)
02784           ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02785           : (*p == '+' || *p == '?'))
02786           || ((syntax & RE_INTERVALS)
02787                   && ((syntax & RE_NO_BK_BRACES)
02788               ? *p == '{'
02789                       : (p[0] == '\\' && p[1] == '{'))))
02790         {
02791           /* Start building a new exactn.  */
02792 
02793               laststart = b;
02794 
02795           BUF_PUSH_2 (exactn, 0);
02796           pending_exact = b - 1;
02797             }
02798 
02799       BUF_PUSH (c);
02800           (*pending_exact)++;
02801       break;
02802         } /* switch (c) */
02803     } /* while p != pend */
02804 
02805 
02806   /* Through the pattern now.  */
02807 
02808   if (fixup_alt_jump)
02809     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02810 
02811   if (!COMPILE_STACK_EMPTY)
02812     FREE_STACK_RETURN (REG_EPAREN);
02813 
02814   /* If we don't want backtracking, force success
02815      the first time we reach the end of the compiled pattern.  */
02816   if (syntax & RE_NO_POSIX_BACKTRACKING)
02817     BUF_PUSH (succeed);
02818 
02819   free (compile_stack.stack);
02820 
02821   /* We have succeeded; set the length of the buffer.  */
02822   bufp->used = b - bufp->buffer;
02823 
02824 #ifdef DEBUG
02825   if (debug)
02826     {
02827       DEBUG_PRINT1 ("\nCompiled pattern: \n");
02828       print_compiled_pattern (bufp);
02829     }
02830 #endif /* DEBUG */
02831 
02832 #ifndef MATCH_MAY_ALLOCATE
02833   /* Initialize the failure stack to the largest possible stack.  This
02834      isn't necessary unless we're trying to avoid calling alloca in
02835      the search and match routines.  */
02836   {
02837     int num_regs = bufp->re_nsub + 1;
02838 
02839     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
02840        is strictly greater than re_max_failures, the largest possible stack
02841        is 2 * re_max_failures failure points.  */
02842     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
02843       {
02844     fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
02845 
02846 #ifdef emacs
02847     if (! fail_stack.stack)
02848       fail_stack.stack
02849         = (fail_stack_elt_t *) xmalloc (fail_stack.size
02850                         * sizeof (fail_stack_elt_t));
02851     else
02852       fail_stack.stack
02853         = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
02854                          (fail_stack.size
02855                           * sizeof (fail_stack_elt_t)));
02856 #else /* not emacs */
02857     if (! fail_stack.stack)
02858       fail_stack.stack
02859         = (fail_stack_elt_t *) malloc (fail_stack.size
02860                        * sizeof (fail_stack_elt_t));
02861     else
02862       fail_stack.stack
02863         = (fail_stack_elt_t *) realloc (fail_stack.stack,
02864                         (fail_stack.size
02865                          * sizeof (fail_stack_elt_t)));
02866 #endif /* not emacs */
02867       }
02868 
02869     regex_grow_registers (num_regs);

int regexec ( regex_t preg,
const char *  string,
size_t  nmatch,
pmatch  ,
int  eflags 
) const

Definition at line 5595 of file regex.c.

05606 {
05607   int ret;
05608   struct re_registers regs;
05609   regex_t private_preg;
05610   int len = strlen (string);
05611   boolean want_reg_info = !preg->no_sub && nmatch > 0;
05612 
05613   private_preg = *preg;
05614 
05615   private_preg.not_bol = !!(eflags & REG_NOTBOL);
05616   private_preg.not_eol = !!(eflags & REG_NOTEOL);
05617 
05618   /* The user has told us exactly how many registers to return
05619      information about, via `nmatch'.  We have to pass that on to the
05620      matching routines.  */
05621   private_preg.regs_allocated = REGS_FIXED;
05622 
05623   if (want_reg_info)
05624     {
05625       regs.num_regs = nmatch;
05626       regs.start = TALLOC (nmatch, regoff_t);
05627       regs.end = TALLOC (nmatch, regoff_t);
05628       if (regs.start == NULL || regs.end == NULL)
05629         return (int) REG_NOMATCH;
05630     }
05631 
05632   /* Perform the searching operation.  */
05633   ret = re_search (&private_preg, string, len,
05634                    /* start: */ 0, /* range: */ len,
05635                    want_reg_info ? &regs : (struct re_registers *) 0);
05636 
05637   /* Copy the register information to the POSIX structure.  */
05638   if (want_reg_info)
05639     {
05640       if (ret >= 0)
05641         {
05642           unsigned r;
05643 
05644           for (r = 0; r < nmatch; r++)
05645             {
05646               pmatch[r].rm_so = regs.start[r];
05647               pmatch[r].rm_eo = regs.end[r];
05648             }
05649         }
05650 
05651       /* If we needed the temporary register info, free the space now.  */
05652       free (regs.start);
05653       free (regs.end);

void regfree ( regex_t preg  ) 

Definition at line 5696 of file regex.c.

05703 {
05704   if (preg->buffer != NULL)
05705     free (preg->buffer);
05706   preg->buffer = NULL;
05707 
05708   preg->allocated = 0;
05709   preg->used = 0;
05710 
05711   if (preg->fastmap != NULL)
05712     free (preg->fastmap);
05713   preg->fastmap = NULL;
05714   preg->fastmap_accurate = 0;

static void store_op1 ( re_opcode_t  op,
unsigned char *  loc,
int  arg 
) [static]

Definition at line 2876 of file regex.c.

02885 {

static void store_op2 ( re_opcode_t  op,
unsigned char *  loc,
int  arg1,
int  arg2 
) [static]

Definition at line 2889 of file regex.c.

02898 {


Variable Documentation

const char* re_error_msgid[] [static]
Initial value:
  {
    gettext_noop ("Success"),   
    gettext_noop ("No match"),  
    gettext_noop ("Invalid regular expression"), 
    gettext_noop ("Invalid collation character"), 
    gettext_noop ("Invalid character class name"), 
    gettext_noop ("Trailing backslash"), 
    gettext_noop ("Invalid back reference"), 
    gettext_noop ("Unmatched [ or [^"), 
    gettext_noop ("Unmatched ( or \\("), 
    gettext_noop ("Unmatched \\{"), 
    gettext_noop ("Invalid content of \\{\\}"), 
    gettext_noop ("Invalid range end"), 
    gettext_noop ("Memory exhausted"), 
    gettext_noop ("Invalid preceding regular expression"), 
    gettext_noop ("Premature end of regular expression"), 
    gettext_noop ("Regular expression too big"), 
    gettext_noop ("Unmatched ) or \\)"), 
  }

Definition at line 973 of file regex.c.

int re_max_failures = 20000

Definition at line 1079 of file regex.c.

Definition at line 942 of file regex.c.

char re_syntax_table[CHAR_SET_SIZE] [static]

Definition at line 143 of file regex.c.

char reg_unset_dummy [static]

Definition at line 1447 of file regex.c.


Generated on 18 Mar 2013 for The SWORD Project by  doxygen 1.6.1