diff options
-rw-r--r-- | regcomp.c | 72 | ||||
-rw-r--r-- | regexec.c | 16 |
2 files changed, 36 insertions, 52 deletions
diff --git a/regcomp.c b/regcomp.c index e85b2351459c..99fee76b8a8c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2776,40 +2776,29 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Local function for parse_bracket_exp used in _LIBC environement. Seek the collating symbol entry correspondings to NAME. - Return the index of the symbol in the SYMB_TABLE. */ + Return the index of the symbol in the SYMB_TABLE, + or -1 if not found. */ auto inline int32_t __attribute ((always_inline)) - seek_collating_symbol_entry (name, name_len) - const unsigned char *name; - size_t name_len; + seek_collating_symbol_entry (const unsigned char *name, size_t name_len) { - int32_t hash = elem_hash ((const char *) name, name_len); - int32_t elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - int32_t second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - /* Compare the length of the name. */ - && name_len == extra[symb_table[2 * elem + 1]] - /* Compare the name. */ - && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], - name_len) == 0) - { - /* Yep, this is the entry. */ - break; - } + int32_t elem; - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - return elem; + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + int32_t idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + if (/* Compare the length of the name. */ + name_len == extra[idx] + /* Compare the name. */ + && memcmp (name, &extra[idx + 1], name_len) == 0) + /* Yep, this is the entry. */ + return elem; + } + return -1; } /* Local function for parse_bracket_exp used in _LIBC environment. @@ -2818,8 +2807,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, auto inline unsigned int __attribute ((always_inline)) - lookup_collation_sequence_value (br_elem) - bracket_elem_t *br_elem; + lookup_collation_sequence_value (bracket_elem_t *br_elem) { if (br_elem->type == SB_CHAR) { @@ -2847,7 +2835,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, int32_t elem, idx; elem = seek_collating_symbol_entry (br_elem->opr.name, sym_name_len); - if (symb_table[2 * elem] != 0) + if (elem != -1) { /* We found the entry. */ idx = symb_table[2 * elem + 1]; @@ -2865,7 +2853,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, /* Return the collation sequence value. */ return *(unsigned int *) (extra + idx); } - else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + else if (sym_name_len == 1) { /* No valid character. Match it as a single byte character. */ @@ -2887,11 +2875,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, auto inline reg_errcode_t __attribute ((always_inline)) - build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - int *range_alloc; - bitset_t sbcset; - bracket_elem_t *start_elem, *end_elem; + build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) { unsigned int ch; uint32_t start_collseq; @@ -2970,25 +2955,22 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, auto inline reg_errcode_t __attribute ((always_inline)) - build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - int *coll_sym_alloc; - bitset_t sbcset; - const unsigned char *name; + build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) { int32_t elem, idx; size_t name_len = strlen ((const char *) name); if (nrules != 0) { elem = seek_collating_symbol_entry (name, name_len); - if (symb_table[2 * elem] != 0) + if (elem != -1) { /* We found the entry. */ idx = symb_table[2 * elem + 1]; /* Skip the name of collating element name. */ idx += 1 + extra[idx]; } - else if (symb_table[2 * elem] == 0 && name_len == 1) + else if (name_len == 1) { /* No valid character, treat it as a normal character. */ diff --git a/regexec.c b/regexec.c index ec4ae1316ffa..5b31f5bbd7e5 100644 --- a/regexec.c +++ b/regexec.c @@ -197,7 +197,7 @@ static int group_nodes_into_DFAstates (const re_dfa_t *dfa, static int check_node_accept (const re_match_context_t *mctx, const re_token_t *node, int idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) +static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) internal_function; /* Entry point for POSIX code. */ @@ -1160,7 +1160,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, || (BE (next_char_idx >= mctx->input.valid_len, 0) && mctx->input.valid_len < mctx->input.len)) { - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_char_idx + 1); if (BE (err != REG_NOERROR, 0)) { assert (err == REG_ESPACE); @@ -1738,7 +1738,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) && mctx->input.valid_len < mctx->input.len)) { reg_errcode_t err; - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_state_log_idx + 1); if (BE (err != REG_NOERROR, 0)) return err; } @@ -2792,7 +2792,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) if (bkref_str_off >= mctx->input.len) break; - err = extend_buffers (mctx); + err = extend_buffers (mctx, bkref_str_off + 1); if (BE (err != REG_NOERROR, 0)) return err; @@ -4102,7 +4102,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, static reg_errcode_t internal_function __attribute_warn_unused_result__ -extend_buffers (re_match_context_t *mctx) +extend_buffers (re_match_context_t *mctx, int min_len) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; @@ -4111,8 +4111,10 @@ extend_buffers (re_match_context_t *mctx) if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) return REG_ESPACE; - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2)); + /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */ + ret = re_string_realloc_buffers (pstr, + MAX (min_len, + MIN (pstr->len, pstr->bufs_len * 2))); if (BE (ret != REG_NOERROR, 0)) return ret; |