diff options
Diffstat (limited to 'tests/retest.c')
-rw-r--r-- | tests/retest.c | 1681 |
1 files changed, 1681 insertions, 0 deletions
diff --git a/tests/retest.c b/tests/retest.c new file mode 100644 index 000000000000..298af0f26442 --- /dev/null +++ b/tests/retest.c @@ -0,0 +1,1681 @@ +/* + retest.c - TRE regression test program + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This is just a simple test application containing various hands-written + tests for regression testing TRE. I've tried to surround TRE specific + tests inside ifdefs, so this can be used to test any POSIX compatible + regexp implementation. +*/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <sys/types.h> +#include <locale.h> +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif /* HAVE_MALLOC_H */ +#include <regex.h> + +#ifdef TRE_VERSION +#define HAVE_REGNEXEC 1 +#define HAVE_REGNCOMP 1 +#include "xmalloc.h" +#else /* !TRE_VERSION */ +#define xmalloc malloc +#define xfree free +#endif /* !TRE_VERSION */ +#include "tre-internal.h" + +#ifdef WRETEST +#include <wchar.h> +#define CHAR_T wchar_t +#define L(x) (L ## x) + +#define MAXSTRSIZE 1024 +static wchar_t wstr[MAXSTRSIZE]; +static wchar_t wregex[MAXSTRSIZE]; +static int woffs[MAXSTRSIZE]; + +#define tre_regexec tre_regwexec +#define tre_regnexec tre_regwnexec +#define tre_regcomp tre_regwcomp +#define tre_regncomp tre_regwncomp + +/* Iterate mbrtowc over the multi-byte sequence STR of length LEN, + store the result in BUF and memoize the successive byte offsets + in OFF. */ + +static int +mbntowc (wchar_t *buf, const char *str, size_t len, int *off) +{ + int n, wlen; +#ifdef HAVE_MBSTATE_T + mbstate_t cst; + memset(&cst, 0, sizeof(cst)); +#endif + + if (len >= MAXSTRSIZE) + { + fprintf(stderr, "Increase MAXSTRSIZE to %ld or more and recompile!\n", + (long)len + 1); + exit(EXIT_FAILURE); + } + + if (off) + { + memset(off + 1, -1, len * sizeof(int)); + *off = 0; + } + + wlen = 0; + while (len > 0) + { + n = tre_mbrtowc(buf ? buf++ : NULL, str, len, &cst); + if (n < 0) + return n; + if (n == 0) + n = 1; + str += n; + len -= n; + wlen += 1; + if (off) + *(off += n) = wlen; + } + + return(wlen); +} + +#else /* !WRETEST */ +#define CHAR_T char +#define L(x) (x) +#endif /* !WRETEST */ + +static int valid_reobj = 0; +static regex_t reobj; +static regmatch_t pmatch_global[32]; +static const CHAR_T *regex_pattern; +static int cflags_global; +static int use_regnexec = 0; +static int use_regncomp = 0; +static int avoid_eflags = 0; + +static int comp_tests = 0; +static int exec_tests = 0; +static int comp_errors = 0; +static int exec_errors = 0; + +#ifndef REG_OK +#define REG_OK 0 +#endif /* REG_OK */ + +#define END -2 + +static void +test_status(char c) +{ + static int k = 0; + printf("%c", c); + if (++k % 79 == 0) + printf("\n"); + fflush(stdout); +} + + +static int +wrap_regexec(const CHAR_T *data, size_t len, + size_t pmatch_len, regmatch_t *pmatch, int eflags) +{ + CHAR_T *buf = NULL; + int result; + + if (len == 0 && use_regnexec) + { + /* Zero length string and using tre_regnexec(), the pointer we give + should not be dereferenced at all. */ + buf = NULL; + } + else + { + /* Copy the data to a separate buffer to make a better test for + tre_regexec() and tre_regnexec(). */ + buf = xmalloc((len + !use_regnexec) * sizeof(CHAR_T)); + if (!buf) + return REG_ESPACE; + memcpy(buf, data, len * sizeof(CHAR_T)); + test_status('#'); + } + +#ifdef HAVE_REGNEXEC + if (use_regnexec) + { + if (len == 0) + result = tre_regnexec(&reobj, NULL, len, pmatch_len, pmatch, eflags); + else + result = tre_regnexec(&reobj, buf, len, pmatch_len, pmatch, eflags); + } + else +#endif /* HAVE_REGNEXEC */ + { + buf[len] = L('\0'); + result = tre_regexec(&reobj, buf, pmatch_len, pmatch, eflags); + } + + xfree(buf); + return result; +} + +static int +wrap_regcomp(regex_t *preg, const CHAR_T *data, size_t len, int cflags) +{ +#ifdef HAVE_REGNCOMP + if (use_regncomp) + return tre_regncomp(preg, data, len, cflags); + else + return tre_regcomp(preg, data, cflags); +#else /* !HAVE_REGNCOMP */ + fprintf(stderr, "%s\n", data); + return tre_regcomp(preg, data, cflags); +#endif /* !HAVE_REGNCOMP */ +} + +static int +execute(const CHAR_T *data, int len, size_t pmatch_len, regmatch_t *pmatch, + int eflags) +{ +#ifdef MALLOC_DEBUGGING + int i = 0; + int ret; + + while (1) + { + xmalloc_configure(i); + comp_tests++; + ret = wrap_regexec(data, len, pmatch_len, pmatch, eflags); + if (ret != REG_ESPACE) + { + break; + } +#ifdef REGEX_DEBUG + xmalloc_dump_leaks(); +#endif /* REGEX_DEBUG */ + i++; + } + return ret; +#else /* !MALLOC_DEBUGGING */ + return wrap_regexec(data, len, pmatch_len, pmatch, eflags); +#endif /* !MALLOC_DEBUGGING */ +} + +static int +check(va_list ap, int ret, const CHAR_T *str, + size_t pmatch_len, regmatch_t *pmatch, int eflags) +{ + int fail = 0; + + if (ret != va_arg(ap, int)) + { +#ifndef WRETEST + printf("Exec error, regex: \"%s\", cflags %d, " + "string: \"%s\", eflags %d\n", regex_pattern, cflags_global, + str, eflags); +#else /* WRETEST */ + printf("Exec error, regex: \"%ls\", cflags %d, " + "string: \"%ls\", eflags %d\n", regex_pattern, cflags_global, + str, eflags); +#endif /* WRETEST */ + printf(" got %smatch (tre_regexec returned %d)\n", ret ? "no " : "", ret); + return 1; + } + + if (ret == 0) + { + unsigned int i; + + for (i = 0; i < pmatch_len; i++) + { + int rm_so, rm_eo; + rm_so = va_arg(ap, int); + if (rm_so == END) + break; + rm_eo = va_arg(ap, int); +#ifdef WRETEST + if (rm_so >= 0) + { + int n = rm_so; + + if ((rm_so = woffs[rm_so]) < 0 || + (n = rm_eo, rm_eo = woffs[rm_eo]) < 0) + { + printf("Invalid or incomplete multi-byte sequence " + "in string %ls before byte offset %d\n", str, n); + return 1; + } + } +#endif /* WRETEST */ + if (pmatch[i].rm_so != rm_so + || pmatch[i].rm_eo != rm_eo) + { +#ifndef WRETEST + printf("Exec error, regex: \"%s\", string: \"%s\"\n", + regex_pattern, str); + printf(" group %d: expected (%d, %d) \"%.*s\", " + "got (%d, %d) \"%.*s\"\n", +#else /* WRETEST */ + printf("Exec error, regex: \"%ls\", string: \"%ls\"\n", + regex_pattern, str); + printf(" group %d: expected (%d, %d) \"%.*ls\", " + "got (%d, %d) \"%.*ls\"\n", +#endif /* WRETEST */ + i, rm_so, rm_eo, rm_eo - rm_so, str + rm_so, + (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo, + (int)(pmatch[i].rm_eo - pmatch[i].rm_so), + str + pmatch[i].rm_so); + fail = 1; + } + } + + if (!(cflags_global & REG_NOSUB) && reobj.re_nsub != i - 1 + && reobj.re_nsub <= pmatch_len && pmatch) + { +#ifndef WRETEST + printf("Comp error, regex: \"%s\"\n", regex_pattern); +#else /* WRETEST */ + printf("Comp error, regex: \"%ls\"\n", regex_pattern); +#endif /* WRETEST */ + printf(" re_nsub is %d, should be %d\n", (int)reobj.re_nsub, i - 1); + fail = 1; + } + + + for (; i < pmatch_len; i++) + if (pmatch[i].rm_so != -1 || pmatch[i].rm_eo != -1) + { + if (!fail) +#ifndef WRETEST + printf("Exec error, regex: \"%s\", string: \"%s\"\n", + regex_pattern, str); +#else /* WRETEST */ + printf("Exec error, regex: \"%ls\", string: \"%ls\"\n", + regex_pattern, str); +#endif /* WRETEST */ + printf(" group %d: expected (-1, -1), got (%d, %d)\n", + i, (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo); + fail = 1; + } + } + + return fail; +} + + +static void +test_nexec(const char *data, size_t len, int eflags, ...) +{ + int m; + int fail = 0; + int extra_flags[] = {0, REG_BACKTRACKING_MATCHER, REG_APPROX_MATCHER}; + size_t i; + va_list ap; + + if (!valid_reobj) + { + exec_errors++; + return; + } + +#ifdef WRETEST + { + int wlen = mbntowc(wstr, data, len, woffs); + if (wlen < 0) + { + exec_errors++; + printf("Invalid or incomplete multi-byte sequence in %s\n", data); + return; + } + wstr[wlen] = L'\0'; + len = wlen; + } +#define data wstr +#endif /* WRETEST */ + + use_regnexec = 1; + + for (i = 0; i < elementsof(extra_flags); i++) + { + int final_flags = eflags | extra_flags[i]; + + if ((final_flags & REG_BACKTRACKING_MATCHER + && tre_have_approx(&reobj)) + || (final_flags & REG_APPROX_MATCHER + && tre_have_backrefs(&reobj)) + || (final_flags & avoid_eflags)) + continue; + + /* Test with a pmatch array. */ + exec_tests++; + m = execute(data, len, elementsof(pmatch_global), pmatch_global, + final_flags); + va_start(ap, eflags); + fail |= check(ap, m, data, elementsof(pmatch_global), pmatch_global, + final_flags); + va_end(ap); + + /* Same test with a NULL pmatch. */ + exec_tests++; + m = execute(data, len, 0, NULL, final_flags); + va_start(ap, eflags); + fail |= check(ap, m, data, 0, NULL, final_flags); + va_end(ap); + } + +#ifdef WRETEST +#undef data +#endif /* WRETEST */ + + if (fail) + exec_errors++; +} + + + +static void +test_exec(const char *str, int eflags, ...) +{ + int m; + int fail = 0; + size_t len = strlen(str); + int extra_flags[] = {0, + REG_BACKTRACKING_MATCHER, + REG_APPROX_MATCHER, + REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER}; + size_t i; + va_list ap; + + if (!valid_reobj) + { + exec_errors++; + return; + } + +#ifdef WRETEST + { + int wlen = mbntowc(wstr, str, len, woffs); + if (wlen < 0) + { + exec_errors++; + printf("Invalid or incomplete multi-byte sequence in %s\n", str); + return; + } + wstr[wlen] = L'\0'; + len = wlen; + } +#define str wstr +#endif /* WRETEST */ + + for (use_regnexec = 0; use_regnexec < 2; use_regnexec++) + { + for (i = 0; i < elementsof(extra_flags); i++) + { + int final_flags = eflags | extra_flags[i]; + + if ((final_flags & REG_BACKTRACKING_MATCHER + && tre_have_approx(&reobj)) + || (final_flags & REG_APPROX_MATCHER + && tre_have_backrefs(&reobj)) + || (final_flags & avoid_eflags)) + continue; + + /* Test with a pmatch array. */ + exec_tests++; + m = execute(str, len, elementsof(pmatch_global), pmatch_global, + final_flags); + va_start(ap, eflags); + fail |= check(ap, m, str, elementsof(pmatch_global), pmatch_global, + final_flags); + va_end(ap); + + /* Same test with a NULL pmatch. */ + exec_tests++; + m = execute(str, len, 0, NULL, final_flags); + va_start(ap, eflags); + fail |= check(ap, m, str, 0, NULL, final_flags); + va_end(ap); + } + } + +#ifdef WRETEST +#undef str +#endif /* WRETEST */ + + if (fail) + exec_errors++; +} + + +static void +test_comp(const char *re, int flags, int ret) +{ + int errcode = 0; + int len = strlen(re); + + if (valid_reobj) + { + tre_regfree(&reobj); + valid_reobj = 0; + } + + comp_tests++; + +#ifdef WRETEST + { + int wlen = mbntowc(wregex, re, len, NULL); + + if (wlen < 0) + { + comp_errors++; + printf("Invalid or incomplete multi-byte sequence in %s\n", re); + return; + } + wregex[wlen] = L'\0'; + len = wlen; + } +#define re wregex +#endif /* WRETEST */ + regex_pattern = re; + cflags_global = flags; + +#ifdef MALLOC_DEBUGGING + { + static int j = 0; + int i = 0; + while (1) + { + xmalloc_configure(i); + comp_tests++; + if (j++ % 20 == 0) + test_status('.'); + errcode = wrap_regcomp(&reobj, re, len, flags); + if (errcode != REG_ESPACE) + { + test_status('*'); + break; + } +#ifdef REGEX_DEBUG + xmalloc_dump_leaks(); +#endif /* REGEX_DEBUG */ + i++; + } + } +#else /* !MALLOC_DEBUGGING */ + errcode = wrap_regcomp(&reobj, re, len, flags); +#endif /* !MALLOC_DEBUGGING */ + +#ifdef WRETEST +#undef re +#endif /* WRETEST */ + + if (errcode != ret) + { +#ifndef WRETEST + printf("Comp error, regex: \"%s\"\n", regex_pattern); +#else /* WRETEST */ + printf("Comp error, regex: \"%ls\"\n", regex_pattern); +#endif /* WRETEST */ + printf(" expected return code %d, got %d.\n", + ret, errcode); + comp_errors++; + } + + if (errcode == 0) + valid_reobj = 1; +} + + + +/* To enable tests for known bugs, set this to 1. */ +#define KNOWN_BUG 0 + +int +main(int argc, char **argv) +{ + +#ifdef WRETEST + /* Need an 8-bit locale. Or move the two tests with non-ascii + characters to the localized internationalization tests. */ + if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") == NULL) + fprintf(stderr, "Could not set locale en_US.ISO-8859-1. Expect some\n" + "`Invalid or incomplete multi-byte sequence' errors.\n"); +#endif /* WRETEST */ + /* Large number of macros in one regexp. */ + test_comp("[A-Z]\\d\\s?\\d[A-Z]{2}|[A-Z]\\d{2}\\s?\\d[A-Z]{2}|[A-Z]{2}\\d" + "\\s?\\d[A-Z]{2}|[A-Z]{2}\\d{2}\\s?\\d[A-Z]{2}|[A-Z]\\d[A-Z]\\s?" + "\\d[A-Z]{2}|[A-Z]{2}\\d[A-Z]\\s?\\d[A-Z]{2}|[A-Z]{3}\\s?\\d[A-Z]" + "{2}", REG_EXTENDED, 0); + + test_comp("a{11}(b{2}c){2}", REG_EXTENDED, 0); + test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0); + test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+", + REG_EXTENDED, 0); + test_comp("^!pfast [0-9]{1,15} ([0-9]{1,3}\\.){3}[0-9]{1,3}[0-9]{1,5}$", + REG_EXTENDED, 0); + +#if KNOWN_BUG + /* Should these match or not? */ + test_comp("(a)*-\\1b", REG_EXTENDED, 0); + test_exec("aaa-b", 0, REG_NOMATCH); + test_comp("((.*)\\1)+", REG_EXTENDED, 0); + test_exec("xxxxxx", 0, REG_NOMATCH); +#endif + +#ifdef TRE_APPROX + /* + * Approximate matching tests. + * + * The approximate matcher always searches for the best match, and returns + * the leftmost and longest one if there are several best matches. + */ + + test_comp("(fou){# ~1}", REG_EXTENDED, 0); + test_comp("(fuu){#}", REG_EXTENDED, 0); + test_comp("(fuu){# ~}", REG_EXTENDED, 0); + test_comp("(anaconda){ 1i + 1d < 1, #1}", REG_EXTENDED, 0); + test_comp("(anaconda){ 1i + 1d < 1 #1 ~10 }", REG_EXTENDED, 0); + test_comp("(anaconda){ #1, ~1, 1i + 1d < 1 }", REG_EXTENDED, 0); + + test_comp("(znacnda){ #1 ~3 1i + 1d < 1 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_NOMATCH); + test_comp("(znacnda){ #1 ~3 1i + 1d < 2 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_OK, 9, 17, 9, 17, END); + test_comp("(ananda){ 1i + 1d < 2 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_NOMATCH); + + test_comp("(fuu){ +3 -3 ~5}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + test_comp("(fuu){ +2 -2 ~5}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + test_comp("(fuu){ +3 -3 ~}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + + test_comp("(laurikari){ #3, 1i + 1d < 3 }", REG_EXTENDED, 0); + + /* No cost limit. */ + test_comp("(foobar){~}", REG_EXTENDED, 0); + test_exec("xirefoabralfobarxie", 0, REG_OK, 11, 16, 11, 16, END); + + /* At most two errors. */ + test_comp("(foobar){~2}", REG_EXTENDED, 0); + test_exec("xirefoabrzlfd", 0, REG_OK, 4, 9, 4, 9, END); + test_exec("xirefoabzlfd", 0, REG_NOMATCH); + + /* At most two inserts or substitutions and max two errors total. */ + test_comp("(foobar){+2#2~2}", REG_EXTENDED, 0); + test_exec("oobargoobaploowap", 0, REG_OK, 5, 11, 5, 11, END); + + /* Find best whole word match for "foobar". */ + test_comp("\\<(foobar){~}\\>", REG_EXTENDED, 0); + test_exec("zfoobarz", 0, REG_OK, 0, 8, 0, 8, END); + test_exec("boing zfoobarz goobar woop", 0, REG_OK, 15, 21, 15, 21, END); + + /* Match whole string, allow only 1 error. */ + test_comp("^(foobar){~1}$", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("xfoobar", 0, REG_OK, 0, 7, 0, 7, END); + /* + This currently fails. + test_exec("foobarx", 0, REG_OK, 0, 7, 0, 7, END); + */ + test_exec("fooxbar", 0, REG_OK, 0, 7, 0, 7, END); + test_exec("foxbar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("xoobar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("foobax", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("oobar", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("fobar", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("fooba", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("xfoobarx", 0, REG_NOMATCH); + test_exec("foobarxx", 0, REG_NOMATCH); + test_exec("xxfoobar", 0, REG_NOMATCH); + test_exec("xfoxbar", 0, REG_NOMATCH); + test_exec("foxbarx", 0, REG_NOMATCH); + + /* At most one insert, two deletes, and three substitutions. + Additionally, deletes cost two and substitutes one, and total + cost must be less than 4. */ + test_comp("(foobar){+1 -2 #3, 2d + 1s < 4}", REG_EXTENDED, 0); + test_exec("3oifaowefbaoraofuiebofasebfaobfaorfeoaro", + 0, REG_OK, 26, 33, 26, 33, END); + + /* Partially approximate matches. */ + test_comp("foo(bar){~1}zap", REG_EXTENDED, 0); + test_exec("foobarzap", 0, REG_OK, 0, 9, 3, 6, END); + test_exec("fobarzap", 0, REG_NOMATCH); + test_exec("foobrzap", 0, REG_OK, 0, 8, 3, 5, END); + test_comp("^.*(dot.org){~}.*$", REG_EXTENDED, 0); + test_exec("www.cnn.com 64.236.16.20\n" + "www.slashdot.org 66.35.250.150\n" + "For useful information, use www.slashdot.org\n" + "this is demo data!\n", + 0, REG_OK, 0, 120, 93, 100, END); + + /* Approximate matching and back referencing cannot be used together. */ + test_comp("(foo{~})\\1", REG_EXTENDED, REG_BADPAT); + +#endif /* TRE_APPROX */ + + /* + * Basic tests with pure regular expressions + */ + + /* Basic string matching. */ + test_comp("foobar", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, END); + test_exec("xxxfoobarzapzot", 0, REG_OK, 3, 9, END); + test_comp("foobar", REG_EXTENDED | REG_NOSUB, 0); + test_exec("foobar", 0, REG_OK, END); + test_comp("aaaa", REG_EXTENDED, 0); + test_exec("xxaaaaaaaaaaaaaaaaa", 0, REG_OK, 2, 6, END); + + /* Test zero length matches. */ + test_comp("(a*)", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + + test_comp("(a*)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + + test_comp("((a*)*)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_comp("(a*bcd)*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabcx", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcxbcdbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcdbcdbcxaabcxaabc", 0, REG_OK, 0, 18, 15, 18, END); + + test_comp("(a*)+", REG_EXTENDED, 0); + test_exec("-", 0, REG_OK, 0, 0, 0, 0, END); + + /* This test blows up the backtracking matcher. */ + avoid_eflags = REG_BACKTRACKING_MATCHER; + test_comp("((a*)*b)*b", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaaaaaaaaaaaaaaab", 0, REG_OK, + 25, 26, -1, -1, -1, -1, END); + avoid_eflags = 0; + + test_comp("", 0, 0); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("foo", 0, REG_OK, 0, 0, END); + + /* Test for submatch addressing which requires arbitrary lookahead. */ + test_comp("(a*)aaaaaa", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaaaaax", 0, REG_OK, 0, 15, 0, 9, END); + + /* Test leftmost and longest matching and some tricky submatches. */ + test_comp("(a*)(a*)", REG_EXTENDED, 0); + test_exec("aaaa", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abcd|abc)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abc|abcd)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abc|abcd)(d?)e", REG_EXTENDED, 0); + test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END); + test_comp("(abcd|abc)(d?)e", REG_EXTENDED, 0); + test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END); + test_comp("a(bc|bcd)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END); + test_comp("a(bcd|bc)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END); + test_comp("a*(a?bc|bcd)(d?)", REG_EXTENDED, 0); + test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END); + test_comp("a*(bcd|a?bc)(d?)", REG_EXTENDED, 0); + test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END); + test_comp("(a|(a*b*))*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, -1, -1, END); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END); + test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END); + test_comp("((a*b*)|a)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END); + test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END); + test_comp("a.*(.*b.*(.*c.*).*d.*).*e.*(.*f.*).*g", REG_EXTENDED, 0); + test_exec("aabbccddeeffgg", 0, REG_OK, 0, 14, 3, 9, 5, 7, 11, 13, END); + test_comp("(wee|week)(night|knights)s*", REG_EXTENDED, 0); + test_exec("weeknights", 0, REG_OK, 0, 10, 0, 3, 3, 10, END); + test_exec("weeknightss", 0, REG_OK, 0, 11, 0, 3, 3, 10, END); + test_comp("a*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aaa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aaaa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + + /* Test clearing old submatch data with nesting parentheses + and iteration. */ + test_comp("((a)|(b))*c", REG_EXTENDED, 0); + test_exec("aaabc", 0, REG_OK, 0, 5, 3, 4, -1, -1, 3, 4, END); + test_exec("aaaac", 0, REG_OK, 0, 5, 3, 4, 3, 4, -1, -1, END); + test_comp("foo((bar)*)*zot", REG_EXTENDED, 0); + test_exec("foozot", 0, REG_OK, 0, 6, 3, 3, -1, -1, END); + test_exec("foobarzot", 0, REG_OK, 0, 9, 3, 6, 3, 6, END); + test_exec("foobarbarzot", 0, REG_OK, 0, 12, 3, 9, 6, 9, END); + + test_comp("foo((zup)*|(bar)*|(zap)*)*zot", REG_EXTENDED, 0); + test_exec("foobarzapzot", 0, REG_OK, + 0, 12, 6, 9, -1, -1, -1, -1, 6, 9, END); + test_exec("foobarbarzapzot", 0, REG_OK, + 0, 15, 9, 12, -1, -1, -1, -1, 9, 12, END); + test_exec("foozupzot", 0, REG_OK, + 0, 9, 3, 6, 3, 6, -1, -1, -1, -1, END); + test_exec("foobarzot", 0, REG_OK, + 0, 9, 3, 6, -1, -1, 3, 6, -1, -1, END); + test_exec("foozapzot", 0, REG_OK, + 0, 9, 3, 6, -1, -1, -1, -1, 3, 6, END); + test_exec("foozot", 0, REG_OK, + 0, 6, 3, 3, -1, -1, -1, -1, -1, -1, END); + + + /* Test case where, e.g., Perl and Python regexp functions, and many + other backtracking matchers, fail to produce the longest match. + It is not exactly a bug since Perl does not claim to find the + longest match, but a confusing feature and, in my opinion, a bad + design choice because the union operator is traditionally defined + to be commutative (with respect to the language denoted by the RE). */ + test_comp("(a|ab)(blip)?", REG_EXTENDED, 0); + test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END); + test_comp("(ab|a)(blip)?", REG_EXTENDED, 0); + test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END); + + /* Test more submatch addressing. */ + test_comp("((a|b)*)a(a|b)*", REG_EXTENDED, 0); + test_exec("aaaaabaaaba", 0, REG_OK, 0, 11, 0, 10, 9, 10, -1, -1, END); + test_exec("aaaaabaaab", 0, REG_OK, 0, 10, 0, 8, 7, 8, 9, 10, END); + test_exec("caa", 0, REG_OK, 1, 3, 1, 2, 1, 2, -1, -1, END); + test_comp("((a|aba)*)(ababbaba)((a|b)*)", REG_EXTENDED, 0); + test_exec("aabaababbabaaababbab", 0, REG_OK, + 0, 20, 0, 4, 1, 4, 4, 12, 12, 20, 19, 20, END); + test_exec("aaaaababbaba", 0, REG_OK, + 0, 12, 0, 4, 3, 4, 4, 12, 12, 12, -1, -1, END); + test_comp("((a|aba|abb|bba|bab)*)(ababbababbabbbabbbbbbabbaba)((a|b)*)", + REG_EXTENDED, 0); + test_exec("aabaabbbbabababaababbababbabbbabbbbbbabbabababbababababbabababa", + 0, REG_OK, 0, 63, 0, 16, 13, 16, 16, 43, 43, 63, 62, 63, END); + + /* Test for empty subexpressions. */ + test_comp("", 0, 0); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("foo", 0, REG_OK, 0, 0, END); + test_comp("(a|)", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + test_comp("a|", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, END); + test_comp("|a", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, END); + + /* Miscellaneous tests. */ + test_comp("(a*)b(c*)", REG_EXTENDED, 0); + test_exec("abc", 0, REG_OK, 0, 3, 0, 1, 2, 3, END); + test_exec("***abc***", 0, REG_OK, 3, 6, 3, 4, 5, 6, END); + test_comp("(a)", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_comp("((a))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END); + test_comp("(((a)))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, END); + test_comp("((((((((((((((((((((a))))))))))))))))))))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, END); + + test_comp("ksntoeaiksntoeaikstneoaiksnteoaiksntoeaiskntoeaiskntoekainstoei" + "askntoeakisntoeksaitnokesantiksoentaikosentaiksoentaiksnoeaiskn" + "teoaksintoekasitnoeksaitkosetniaksoetnaisknoetakistoeksintokesa" + "nitksoentaisknoetaisknoetiaksotneaikstoekasitoeskatioksentaikso" + "enatiksoetnaiksonateiksoteaeskanotisknetaiskntoeasknitoskenatis" + "konetaisknoteai", 0, 0); + + test_comp("((aab)|(aac)|(aa*))c", REG_EXTENDED, 0); + test_exec("aabc", 0, REG_OK, 0, 4, 0, 3, 0, 3, -1, -1, -1, -1, END); + test_exec("aacc", 0, REG_OK, 0, 4, 0, 3, -1, -1, 0, 3, -1, -1, END); + test_exec("aaac", 0, REG_OK, 0, 4, 0, 3, -1, -1, -1, -1, 0, 3, END); + + test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + test_comp("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + + test_comp("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", + REG_EXTENDED, 0); + test_exec("Muammar Quathafi", 0, REG_OK, 0, 16, -1, -1, 11, 13, END); + + test_comp("(Ab|cD)*", REG_EXTENDED | REG_ICASE, 0); + test_exec("aBcD", 0, REG_OK, 0, 4, 2, 4, END); + + test_comp("a**", REG_EXTENDED, REG_BADRPT); + test_comp("a*+", REG_EXTENDED, REG_BADRPT); + test_comp("a+*", REG_EXTENDED, REG_BADRPT); + test_comp("a++", REG_EXTENDED, REG_BADRPT); + test_comp("a?+", REG_EXTENDED, REG_BADRPT); + test_comp("a?*", REG_EXTENDED, REG_BADRPT); + test_comp("a{1,2}*", REG_EXTENDED, REG_BADRPT); + test_comp("a{1,2}+", REG_EXTENDED, REG_BADRPT); + + /* + * Many of the following tests were mostly inspired by (or copied from) the + * libhackerlab posix test suite by Tom Lord. + */ + + test_comp("a", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_comp("\\.", 0, 0); + test_exec(".", 0, REG_OK, 0, 1, END); + test_comp("\\[", 0, 0); + test_exec("[", 0, REG_OK, 0, 1, END); + test_comp("\\\\", 0, 0); + test_exec("\\", 0, REG_OK, 0, 1, END); + test_comp("\\*", 0, 0); + test_exec("*", 0, REG_OK, 0, 1, END); + test_comp("\\^", 0, 0); + test_exec("^", 0, REG_OK, 0, 1, END); + test_comp("\\$", 0, 0); + test_exec("$", 0, REG_OK, 0, 1, END); + + test_comp("\\", 0, REG_EESCAPE); + + test_comp("x\\.", 0, 0); + test_exec("x.", 0, REG_OK, 0, 2, END); + test_comp("x\\[", 0, 0); + test_exec("x[", 0, REG_OK, 0, 2, END); + test_comp("x\\\\", 0, 0); + test_exec("x\\", 0, REG_OK, 0, 2, END); + test_comp("x\\*", 0, 0); + test_exec("x*", 0, REG_OK, 0, 2, END); + test_comp("x\\^", 0, 0); + test_exec("x^", 0, REG_OK, 0, 2, END); + test_comp("x\\$", 0, 0); + test_exec("x$", 0, REG_OK, 0, 2, END); + + test_comp("x\\", 0, REG_EESCAPE); + + test_comp(".", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("\n", 0, REG_OK, 0, 1, END); + + test_comp("(+|?)", 0, 0); + test_exec("(+|?)", 0, REG_OK, 0, 5, END); + test_exec("+|?", 0, REG_NOMATCH); + test_exec("(+)", 0, REG_NOMATCH); + test_exec("+", 0, REG_NOMATCH); + + + /* + * Test bracket expressions. + */ + + test_comp("[", 0, REG_EBRACK); + test_comp("[]", 0, REG_EBRACK); + test_comp("[^]", 0, REG_EBRACK); + + test_comp("[]x]", 0, 0); + test_exec("]", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + + test_comp("[.]", 0, 0); + test_exec(".", 0, REG_OK, 0, 1, END); + test_exec("a", 0, REG_NOMATCH); + + test_comp("[*]", 0, 0); + test_exec("*", 0, REG_OK, 0, 1, END); + + test_comp("[[]", 0, 0); + test_exec("[", 0, REG_OK, 0, 1, END); + + test_comp("[\\]", 0, 0); + test_exec("\\", 0, REG_OK, 0, 1, END); + + test_comp("[-x]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + test_comp("[x-]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + test_comp("[-]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + + test_comp("[abc]", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 1, END); + test_exec("c", 0, REG_OK, 0, 1, END); + test_exec("d", 0, REG_NOMATCH); + test_exec("xa", 0, REG_OK, 1, 2, END); + test_exec("xb", 0, REG_OK, 1, 2, END); + test_exec("xc", 0, REG_OK, 1, 2, END); + test_exec("xd", 0, REG_NOMATCH); + test_comp("x[abc]", 0, 0); + test_exec("xa", 0, REG_OK, 0, 2, END); + test_exec("xb", 0, REG_OK, 0, 2, END); + test_exec("xc", 0, REG_OK, 0, 2, END); + test_exec("xd", 0, REG_NOMATCH); + test_comp("[^abc]", 0, 0); + test_exec("a", 0, REG_NOMATCH); + test_exec("b", 0, REG_NOMATCH); + test_exec("c", 0, REG_NOMATCH); + test_exec("d", 0, REG_OK, 0, 1, END); + test_exec("xa", 0, REG_OK, 0, 1, END); + test_exec("xb", 0, REG_OK, 0, 1, END); + test_exec("xc", 0, REG_OK, 0, 1, END); + test_exec("xd", 0, REG_OK, 0, 1, END); + test_comp("x[^abc]", 0, 0); + test_exec("xa", 0, REG_NOMATCH); + test_exec("xb", 0, REG_NOMATCH); + test_exec("xc", 0, REG_NOMATCH); + test_exec("xd", 0, REG_OK, 0, 2, END); + + test_comp("[()+?*\\]+", REG_EXTENDED, 0); + test_exec("x\\*?+()x", 0, REG_OK, 1, 7, END); + + /* Standard character classes. */ + test_comp("[[:alnum:]]+", REG_EXTENDED, 0); + test_exec("%abc123890XYZ=", 0, REG_OK, 1, 13, END); + test_comp("[[:cntrl:]]+", REG_EXTENDED, 0); + test_exec("%\n\t\015\f ", 0, REG_OK, 1, 5, END); + test_comp("[[:lower:]]+", REG_EXTENDED, 0); + test_exec("AbcdE", 0, REG_OK, 1, 4, END); + test_comp("[[:lower:]]+", REG_EXTENDED | REG_ICASE, 0); + test_exec("AbcdE", 0, REG_OK, 0, 5, END); + test_comp("[[:space:]]+", REG_EXTENDED, 0); + test_exec("x \t\f\nx", 0, REG_OK, 1, 5, END); + test_comp("[[:alpha:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 1, 4, END); + test_comp("[[:digit:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 4, 10, END); + test_comp("[^[:digit:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 0, 4, END); + test_comp("[[:print:]]+", REG_EXTENDED, 0); + test_exec("\n %abC12\f", 0, REG_OK, 1, 8, END); + test_comp("[[:upper:]]+", REG_EXTENDED, 0); + test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 3, 27, END); + test_comp("[[:upper:]]+", REG_EXTENDED | REG_ICASE, 0); + test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 2, 28, END); +#ifdef HAVE_ISWBLANK +#ifdef HAVE_ISBLANK + test_comp("[[:blank:]]+", REG_EXTENDED, 0); + test_exec("\na \t b", 0, REG_OK, 2, 5, END); +#endif /* HAVE_ISBLANK */ +#endif /* HAVE_ISWBLANK */ + test_comp("[[:graph:]]+", REG_EXTENDED, 0); + test_exec("\n %abC12\f", 0, REG_OK, 2, 8, END); + test_comp("[[:punct:]]+", REG_EXTENDED, 0); + test_exec("a~!@#$%^&*()_+=-`[]{};':\"|\\,./?>< ", + 0, REG_OK, 1, 33, END); + test_comp("[[:xdigit:]]+", REG_EXTENDED, 0); + test_exec("-0123456789ABCDEFabcdef", 0, REG_OK, 1, 23, END); + test_comp("[[:bogus-character-class-name:]", REG_EXTENDED, REG_ECTYPE); + + + /* Range expressions (assuming that the C locale is being used). */ + test_comp("[a-z]+", REG_EXTENDED, 0); + test_exec("ABCabcxyzABC", 0, REG_OK, 3, 9, END); + test_comp("[z-a]+", REG_EXTENDED, REG_ERANGE); + test_comp("[a-b-c]", 0, REG_ERANGE); + test_comp("[a-a]+", REG_EXTENDED, 0); + test_exec("zaaaaab", 0, REG_OK, 1, 6, END); + test_comp("[--Z]+", REG_EXTENDED, 0); + test_exec("!ABC-./XYZ~", 0, REG_OK, 1, 10, END); + test_comp("[*--]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("*", 0, REG_OK, 0, 1, END); + test_comp("[*--Z]+", REG_EXTENDED, 0); + test_exec("!+*,---ABC", 0, REG_OK, 1, 7, END); + test_comp("[a-]+", REG_EXTENDED, 0); + test_exec("xa-a--a-ay", 0, REG_OK, 1, 9, END); + + /* REG_ICASE and character sets. */ + test_comp("[a-c]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("cABbage", 0, REG_OK, 0, 5, END); + test_comp("[^a-c]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("tObAcCo", 0, REG_OK, 0, 2, END); + test_comp("[A-C]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("cABbage", 0, REG_OK, 0, 5, END); + test_comp("[^A-C]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("tObAcCo", 0, REG_OK, 0, 2, END); + + /* Complex character sets. */ + test_comp("[[:digit:]a-z#$%]+", REG_EXTENDED, 0); + test_exec("__abc#lmn012$x%yz789*", 0, REG_OK, 2, 20, END); + test_comp("[[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("__abcLMN012x%#$yz789*", 0, REG_OK, 2, 20, END); + test_comp("[^[:digit:]a-z#$%]+", REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END); + test_comp("[^[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END); + test_comp("[^[:digit:]#$%[:xdigit:]]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 4, 7, END); + test_comp("[^-]+", REG_EXTENDED, 0); + test_exec("---afd*(&,ml---", 0, REG_OK, 3, 12, END); + test_comp("[^--Z]+", REG_EXTENDED, 0); + test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 12, END); + test_comp("[^--Z]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 10, END); + + /* Unsupported things (equivalence classes and multicharacter collating + elements) */ + test_comp("[[.foo.]]", 0, REG_ECOLLATE); + test_comp("[[=foo=]]", 0, REG_ECOLLATE); + test_comp("[[..]]", 0, REG_ECOLLATE); + test_comp("[[==]]", 0, REG_ECOLLATE); + test_comp("[[.]]", 0, REG_ECOLLATE); + test_comp("[[=]]", 0, REG_ECOLLATE); + test_comp("[[.]", 0, REG_ECOLLATE); + test_comp("[[=]", 0, REG_ECOLLATE); + test_comp("[[.", 0, REG_ECOLLATE); + test_comp("[[=", 0, REG_ECOLLATE); + + + + /* Miscellaneous tests. */ + test_comp("abc\\(\\(de\\)\\(fg\\)\\)hi", 0, 0); + test_exec("xabcdefghiy", 0, REG_OK, 1, 10, 4, 8, 4, 6, 6, 8, END); + + test_comp("abc*def", 0, 0); + test_exec("xabdefy", 0, REG_OK, 1, 6, END); + test_exec("xabcdefy", 0, REG_OK, 1, 7, END); + test_exec("xabcccccccdefy", 0, REG_OK, 1, 13, END); + + test_comp("abc\\(def\\)*ghi", 0, 0); + test_exec("xabcghiy", 0, REG_OK, 1, 7, -1, -1, END); + test_exec("xabcdefghi", 0, REG_OK, 1, 10, 4, 7, END); + test_exec("xabcdefdefdefghi", 0, REG_OK, 1, 16, 10, 13, END); + + test_comp("a?", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 1, END); + test_exec("xaaaaa", 0, REG_OK, 0, 0, END); + test_comp("a+", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 5, END); + test_exec("xaaaaa", 0, REG_OK, 1, 6, END); + + + /* + * Test anchors and their behaviour with the REG_NEWLINE compilation + * flag and the REG_NOTBOL, REG_NOTEOL execution flags. + */ + + /* Normally, `^' matches the empty string at beginning of input. + If REG_NOTBOL is used, `^' won't match the zero length string. */ + test_comp("^abc", 0, 0); + test_exec("abcdef", 0, REG_OK, 0, 3, END); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("xyzabcdef", 0, REG_NOMATCH); + test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("\nabcdef", REG_NOTBOL, REG_NOMATCH); + + /* Normally, `$' matches the empty string at end of input. + If REG_NOTEOL is used, `$' won't match the zero length string. */ + test_comp("abc$", 0, 0); + test_exec("defabc", 0, REG_OK, 3, 6, END); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("defabcxyz", 0, REG_NOMATCH); + test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("defabc\n", REG_NOTEOL, REG_NOMATCH); + + test_comp("^abc$", 0, 0); + test_exec("abc", 0, REG_OK, 0, 3, END); + test_exec("abc", REG_NOTBOL, REG_NOMATCH); + test_exec("abc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("\nabc\n", 0, REG_NOMATCH); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("abcdef", 0, REG_NOMATCH); + test_exec("defabc", 0, REG_NOMATCH); + test_exec("abc\ndef", 0, REG_NOMATCH); + test_exec("def\nabc", 0, REG_NOMATCH); + + /* If REG_NEWLINE is used, `^' matches the empty string immediately after + a newline, regardless of whether execution flags contain REG_NOTBOL. + Similarly, if REG_NEWLINE is used, `$' matches the empty string + immediately before a newline, regardless of execution flags. */ + test_comp("^abc", REG_NEWLINE, 0); + test_exec("abcdef", 0, REG_OK, 0, 3, END); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("xyzabcdef", 0, REG_NOMATCH); + test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_OK, 1, 4, END); + test_exec("\nabcdef", REG_NOTBOL, 0, 1, 4, END); + test_comp("abc$", REG_NEWLINE, 0); + test_exec("defabc", 0, REG_OK, 3, 6, END); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("defabcxyz", 0, REG_NOMATCH); + test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH); + test_exec("defabc\n", 0, REG_OK, 3, 6, END); + test_exec("defabc\n", REG_NOTEOL, 0, 3, 6, END); + test_comp("^abc$", REG_NEWLINE, 0); + test_exec("abc", 0, REG_OK, 0, 3, END); + test_exec("abc", REG_NOTBOL, REG_NOMATCH); + test_exec("abc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("\nabc\n", 0, REG_OK, 1, 4, END); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("abcdef", 0, REG_NOMATCH); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("defabc", 0, REG_NOMATCH); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc\ndef", 0, REG_OK, 0, 3, END); + test_exec("abc\ndef", REG_NOTBOL, REG_NOMATCH); + test_exec("abc\ndef", REG_NOTEOL, 0, 0, 3, END); + test_exec("abc\ndef", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("def\nabc", 0, REG_OK, 4, 7, END); + test_exec("def\nabc", REG_NOTBOL, 0, 4, 7, END); + test_exec("def\nabc", REG_NOTEOL, REG_NOMATCH); + test_exec("def\nabc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + + /* With BRE syntax, `^' has a special meaning only at the beginning of the + RE or the beginning of a parenthesized subexpression. */ + test_comp("a\\{0,1\\}^bc", 0, 0); + test_exec("bc", 0, REG_NOMATCH); + test_exec("^bc", 0, REG_OK, 0, 3, END); + test_exec("abc", 0, REG_NOMATCH); + test_exec("a^bc", 0, REG_OK, 0, 4, END); + test_comp("a\\{0,1\\}\\(^bc\\)", 0, 0); + test_exec("bc", 0, REG_OK, 0, 2, 0, 2, END); + test_exec("^bc", 0, REG_NOMATCH); + test_exec("abc", 0, REG_NOMATCH); + test_exec("a^bc", 0, REG_NOMATCH); + test_comp("(^a", 0, 0); + test_exec("(^a", 0, REG_OK, 0, 3, END); + + /* With BRE syntax, `$' has a special meaning only at the end of the + RE or the end of a parenthesized subexpression. */ + test_comp("ab$c\\{0,1\\}", 0, 0); + test_exec("ab", 0, REG_NOMATCH); + test_exec("ab$", 0, REG_OK, 0, 3, END); + test_exec("abc", 0, REG_NOMATCH); + test_exec("ab$c", 0, REG_OK, 0, 4, END); + test_comp("\\(ab$\\)c\\{0,1\\}", 0, 0); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, END); + test_exec("ab$", 0, REG_NOMATCH); + test_exec("abc", 0, REG_NOMATCH); + test_exec("ab$c", 0, REG_NOMATCH); + test_comp("a$)", 0, 0); + test_exec("a$)", 0, REG_OK, 0, 3, END); + + /* Miscellaneous tests for `^' and `$'. */ + test_comp("foo^$", REG_EXTENDED, 0); + test_exec("foo", 0, REG_NOMATCH); + test_comp("x$\n^y", REG_EXTENDED | REG_NEWLINE, 0); + test_exec("foo\nybarx\nyes\n", 0, REG_OK, 8, 11, END); + test_comp("^$", 0, 0); + test_exec("x", 0, REG_NOMATCH); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("\n", 0, REG_NOMATCH); + test_comp("^$", REG_NEWLINE, 0); + test_exec("x", 0, REG_NOMATCH); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("\n", 0, REG_OK, 0, 0, END); + + /* REG_NEWLINE causes `.' not to match newlines. */ + test_comp(".*", 0, 0); + test_exec("ab\ncd", 0, REG_OK, 0, 5, END); + test_comp(".*", REG_NEWLINE, 0); + test_exec("ab\ncd", 0, REG_OK, 0, 2, END); + + /* + * Tests for nonstandard syntax extensions. + */ + + /* Zero width assertions. */ + test_comp("\\<x", REG_EXTENDED, 0); + test_exec("aax xaa", 0, REG_OK, 4, 5, END); + test_exec("xaa", 0, REG_OK, 0, 1, END); + test_comp("x\\>", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 2, 3, END); + test_exec("aax", 0, REG_OK, 2, 3, END); + test_comp("\\bx", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 4, 5, END); + test_exec("aax", 0, REG_NOMATCH); + test_exec("xax", 0, REG_OK, 0, 1, END); + test_comp("x\\b", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 2, 3, END); + test_exec("aax", 0, REG_OK, 2, 3, END); + test_exec("xaa", 0, REG_NOMATCH); + test_comp("\\Bx", REG_EXTENDED, 0); + test_exec("aax xxa", 0, REG_OK, 2, 3, END); + test_comp("\\Bx\\b", REG_EXTENDED, 0); + test_exec("aax xxx", 0, REG_OK, 2, 3, END); + test_comp("\\<.", REG_EXTENDED, 0); + test_exec(";xaa", 0, REG_OK, 1, 2, END); + + /* Shorthands for character classes. */ + test_comp("\\w+", REG_EXTENDED, 0); + test_exec(",.(a23_Nt-öo)", 0, REG_OK, 3, 9, END); + test_comp("\\d+", REG_EXTENDED, 0); + test_exec("uR120_4=v4", 0, REG_OK, 2, 5, END); + test_comp("\\D+", REG_EXTENDED, 0); + test_exec("120d_=vA4s", 0, REG_OK, 3, 8, END); + + /* Quoted special characters. */ + test_comp("\\t", REG_EXTENDED, 0); + test_comp("\\e", REG_EXTENDED, 0); + + /* Test the \x1B and \x{263a} extensions for specifying 8 bit and wide + characters in hexadecimal. */ + test_comp("\\x41", REG_EXTENDED, 0); + test_exec("ABC", 0, REG_OK, 0, 1, END); + test_comp("\\x5", REG_EXTENDED, 0); + test_exec("\005", 0, REG_OK, 0, 1, END); + test_comp("\\x5r", REG_EXTENDED, 0); + test_exec("\005r", 0, REG_OK, 0, 2, END); + test_comp("\\x", REG_EXTENDED, 0); + test_nexec("\000", 1, 0, REG_OK, 0, 1, END); + test_comp("\\xr", REG_EXTENDED, 0); + test_nexec("\000r", 2, 0, REG_OK, 0, 2, END); + test_comp("\\x{41}", REG_EXTENDED, 0); + test_exec("ABC", 0, REG_OK, 0, 1, END); + test_comp("\\x{5}", REG_EXTENDED, 0); + test_exec("\005", 0, REG_OK, 0, 1, END); + test_comp("\\x{5}r", REG_EXTENDED, 0); + test_exec("\005r", 0, REG_OK, 0, 2, END); + test_comp("\\x{}", REG_EXTENDED, 0); + test_nexec("\000", 1, 0, REG_OK, 0, 1, END); + test_comp("\\x{}r", REG_EXTENDED, 0); + test_nexec("\000r", 2, 0, REG_OK, 0, 2, END); + + /* Tests for (?inrU-inrU) and (?inrU-inrU:) */ + test_comp("foo(?i)bar", REG_EXTENDED, 0); + test_exec("fooBaR", 0, REG_OK, 0, 6, END); + test_comp("foo(?i)bar|zap", REG_EXTENDED, 0); + test_exec("fooBaR", 0, REG_OK, 0, 6, END); + test_exec("foozap", 0, REG_OK, 0, 6, END); + test_exec("foozAp", 0, REG_OK, 0, 6, END); + test_exec("zap", 0, REG_NOMATCH); + test_comp("foo(?-i:zap)zot", REG_EXTENDED | REG_ICASE, 0); + test_exec("FoOzapZOt", 0, REG_OK, 0, 9, END); + test_exec("FoOzApZOt", 0, REG_NOMATCH); + test_comp("foo(?i:bar|zap)", REG_EXTENDED, 0); + test_exec("foozap", 0, REG_OK, 0, 6, END); + test_exec("foobar", 0, REG_OK, 0, 6, END); + test_exec("foobAr", 0, REG_OK, 0, 6, END); + test_exec("fooZaP", 0, REG_OK, 0, 6, END); + test_comp("foo(?U:o*)(o*)", REG_EXTENDED, 0); + test_exec("foooo", 0, REG_OK, 0, 5, 3, 5, END); + + /* Test comment syntax. */ + test_comp("foo(?# This here is a comment. )bar", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, END); + + /* Tests for \Q and \E. */ + test_comp("\\((\\Q)?:\\<[^$\\E)", REG_EXTENDED, 0); + test_exec("()?:\\<[^$", 0, REG_OK, 0, 9, 1, 9, END); + test_comp("\\Qabc\\E.*", REG_EXTENDED, 0); + test_exec("abcdef", 0, REG_OK, 0, 6, END); + test_comp("\\Qabc\\E.*|foo", REG_EXTENDED, 0); + test_exec("parabc123wxyz", 0, REG_OK, 3, 13, END); + test_exec("fooabc123wxyz", 0, REG_OK, 0, 3, END); + + /* + * Test bounded repetitions. + */ + + test_comp("a{0,0}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 0, END); + test_comp("a{0,1}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 1, END); + test_comp("a{1,1}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 1, END); + test_comp("a{1,3}", REG_EXTENDED, REG_OK); + test_exec("xaaaaa", 0, REG_OK, 1, 4, END); + test_comp("a{0,3}", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 3, END); + test_comp("a{0,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{1,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{2,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{3,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_NOMATCH); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 4, END); + test_exec("aaaaa", 0, REG_OK, 0, 5, END); + test_exec("aaaaaa", 0, REG_OK, 0, 6, END); + test_exec("aaaaaaa", 0, REG_OK, 0, 7, END); + + test_comp("a{5,10}", REG_EXTENDED, REG_OK); + test_comp("a{6,6}", REG_EXTENDED, REG_OK); + test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END); + test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END); + test_exec("xxaaaaa", 0, REG_NOMATCH); + test_comp("a{5,6}", REG_EXTENDED, REG_OK); + test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END); + test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END); + test_exec("xxaaaaa", 0, REG_OK, 2, 7, END); + test_exec("xxaaaa", 0, REG_NOMATCH); + + /* Trickier ones... */ + test_comp("([ab]{5,10})*b", REG_EXTENDED, REG_OK); + test_exec("bbbbbabaaaaab", 0, REG_OK, 0, 13, 5, 12, END); + test_exec("bbbbbbaaaaab", 0, REG_OK, 0, 12, 5, 11, END); + test_exec("bbbbbbaaaab", 0, REG_OK, 0, 11, 0, 10, END); + test_exec("bbbbbbaaab", 0, REG_OK, 0, 10, 0, 9, END); + test_exec("bbbbbbaab", 0, REG_OK, 0, 9, 0, 8, END); + test_exec("bbbbbbab", 0, REG_OK, 0, 8, 0, 7, END); + + test_comp("([ab]*)(ab[ab]{5,10})ba", REG_EXTENDED, REG_OK); + test_exec("abbabbbabaabbbbbbbbbbbbbabaaaabab", 0, REG_OK, + 0, 10, 0, 0, 0, 8, END); + test_exec("abbabbbabaabbbbbbbbbbbbabaaaaabab", 0, REG_OK, + 0, 32, 0, 23, 23, 30, END); + test_exec("abbabbbabaabbbbbbbbbbbbabaaaabab", 0, REG_OK, + 0, 24, 0, 10, 10, 22, END); + test_exec("abbabbbabaabbbbbbbbbbbba", 0, REG_OK, + 0, 24, 0, 10, 10, 22, END); + + /* Test repeating something that has submatches inside. */ + test_comp("(a){0,5}", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END); + test_exec("aaaaa", 0, REG_OK, 0, 5, 4, 5, END); + test_exec("aaaaaa", 0, REG_OK, 0, 5, 4, 5, END); + + test_comp("(a){2,3}", REG_EXTENDED, 0); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 3, 2, 3, END); + + test_comp("\\(a\\)\\{4\\}", 0, 0); + test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END); + + test_comp("\\(a*\\)\\{2\\}", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, 1, 1, END); + + test_comp("((..)|(.)){2}", REG_EXTENDED, 0); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, -1, -1, 1, 2, END); + + /* Nested repeats. */ + test_comp("(.){2}{3}", REG_EXTENDED, 0); + test_exec("xxxxx", 0, REG_NOMATCH); + test_exec("xxxxxx", 0, REG_OK, 0, 6, 5, 6, END); + test_comp("(..){2}{3}", REG_EXTENDED, 0); + test_exec("xxxxxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxxxxx", 0, REG_OK, 0, 12, 10, 12, END); + test_comp("((..){2}.){3}", REG_EXTENDED, 0); + test_exec("xxxxxxxxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxxxxxxxx", 0, REG_OK, 0, 15, 10, 15, 12, 14, END); + test_comp("((..){1,2}.){3}", REG_EXTENDED, 0); + test_exec("xxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END); + test_exec("xxxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END); + test_exec("xxxxxxxxxxx", 0, REG_OK, 0, 11, 8, 11, 8, 10, END); + test_comp("a{2}{2}x", REG_EXTENDED, 0); + test_exec("", 0, REG_NOMATCH); + test_exec("x", 0, REG_NOMATCH); + test_exec("ax", 0, REG_NOMATCH); + test_exec("aax", 0, REG_NOMATCH); + test_exec("aaax", 0, REG_NOMATCH); + test_exec("aaaax", 0, REG_OK, 0, 5, END); + test_exec("aaaaax", 0, REG_OK, 1, 6, END); + test_exec("aaaaaax", 0, REG_OK, 2, 7, END); + test_exec("aaaaaaax", 0, REG_OK, 3, 8, END); + test_exec("aaaaaaaax", 0, REG_OK, 4, 9, END); + + /* Repeats with iterations inside. */ + test_comp("([a-z]+){2,5}", REG_EXTENDED, 0); + test_exec("a\n", 0, REG_NOMATCH); + test_exec("aa\n", 0, REG_OK, 0, 2, 1, 2, END); + + /* Multiple repeats in one regexp. */ + test_comp("a{3}b{3}", REG_EXTENDED, 0); + test_exec("aaabbb", 0, REG_OK, 0, 6, END); + test_exec("aaabbbb", 0, REG_OK, 0, 6, END); + test_exec("aaaabbb", 0, REG_OK, 1, 7, END); + test_exec("aabbb", 0, REG_NOMATCH); + test_exec("aaabb", 0, REG_NOMATCH); + + /* Test that different types of repetitions work correctly when used + in the same regexp. */ + test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0); + test_exec("aaaaxbxcxdx", 0, REG_OK, 0, 11, END); + test_exec("aaaxbxcxdx", 0, REG_NOMATCH); + test_exec("aabxcxdx", 0, REG_NOMATCH); + test_exec("aaaacxdx", 0, REG_NOMATCH); + test_exec("aaaaxbdx", 0, REG_NOMATCH); + test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+", + REG_EXTENDED, 0); + test_exec("!packet 10.0.2.4 12765 ei voittoa", 0, REG_OK, 0, 22, END); + + /* + * Back referencing tests. + */ + test_comp("([a-z]*) \\1", REG_EXTENDED, 0); + test_exec("foobar foobar", 0, REG_OK, 0, 13, 0, 6, END); + + /* Searching for a leftmost longest square (repeated string) */ + test_comp("(.*)\\1", REG_EXTENDED, 0); + test_exec("foobarfoobar", 0, REG_OK, 0, 12, 0, 6, END); + + test_comp("a(b)*c\\1", REG_EXTENDED, 0); + test_exec("acb", 0, REG_OK, 0, 2, -1, -1, END); + test_exec("abbcbbb", 0, REG_OK, 0, 5, 2, 3, END); + test_exec("abbdbd", 0, REG_NOMATCH); + + test_comp("([a-c]*)\\1", REG_EXTENDED, 0); + test_exec("abcacdef", 0, REG_OK, 0, 0, 0, 0, END); + test_exec("abcabcabcd", 0, REG_OK, 0, 6, 0, 3, END); + + test_comp("\\(a*\\)*\\(x\\)\\(\\1\\)", 0, 0); + test_exec("x", 0, REG_OK, 0, 1, 0, 0, 0, 1, 1, 1, END); +#if KNOWN_BUG + test_exec("ax", 0, REG_OK, 0, 2, 1, 1, 1, 2, 2, 2, END); +#endif + + test_comp("(a)\\1{1,2}", REG_EXTENDED, 0); + test_exec("aabc", 0, REG_OK, 0, 2, 0, 1, END); + + test_comp("((.*)\\1)+", REG_EXTENDED, 0); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 1, END); + +#if KNOWN_BUG + test_comp("()(\\1\\1)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); +#endif + + /* Check that back references work with REG_NOSUB. */ + test_comp("(o)\\1", REG_EXTENDED | REG_NOSUB, 0); + test_exec("foobar", 0, REG_OK, END); + test_comp("(o)\\1", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 1, 3, 1, 2, END); + test_comp("(o)\\1", REG_EXTENDED, 0); + test_exec("fobar", 0, REG_NOMATCH); + + test_comp("\\1foo", REG_EXTENDED, REG_ESUBREG); + test_comp("\\1foo(bar)", REG_EXTENDED, 0); + + /* Back reference with zero-width assertion. */ + test_comp("(.)\\1$", REG_EXTENDED, 0); + test_exec("foox", 0, REG_NOMATCH); + test_exec("foo", 0, REG_OK, 1, 3, 1, 2, END); + + /* Back references together with {}. */ + test_comp("([0-9]{5})\\1", REG_EXTENDED, 0); + test_exec("12345", 0, REG_NOMATCH); + test_exec("1234512345", 0, REG_OK, 0, 10, 0, 5, END); + test_comp("([0-9]{4})\\1", REG_EXTENDED, 0); + test_exec("1234", 0, REG_NOMATCH); + test_exec("12341234", 0, REG_OK, 0, 8, 0, 4, END); + + /* + * Test minimal repetitions (non-greedy repetitions) + */ + avoid_eflags = REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER; + + /* Basic .*/ + test_comp(".*?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".+?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 1, END); + test_comp(".??", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".{2,5}?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 2, END); + + /* More complicated. */ + test_comp("<b>(.*?)</b>", REG_EXTENDED, 0); + test_exec("<b>text1</b><b>text2</b>", 0, REG_OK, 0, 12, 3, 8, END); + test_comp("a(.*?)(foo|bar|zap)", REG_EXTENDED, 0); + test_exec("hubba wooga-booga zabar gafoo wazap", 0, REG_OK, + 4, 23, 5, 20, 20, 23, END); + + /* Test REG_UNGREEDY. */ + test_comp(".*", REG_EXTENDED | REG_UNGREEDY, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".*?", REG_EXTENDED | REG_UNGREEDY, 0); + test_exec("abcd", 0, REG_OK, 0, 4, END); + + avoid_eflags = 0; + + + /* + * Error reporting tests. + */ + + test_comp("\\", REG_EXTENDED, REG_EESCAPE); + test_comp("\\\\", REG_EXTENDED, REG_OK); + test_exec("\\", 0, REG_OK, 0, 1, END); + test_comp("(", REG_EXTENDED, REG_EPAREN); + test_comp("(aaa", REG_EXTENDED, REG_EPAREN); + test_comp(")", REG_EXTENDED, REG_OK); + test_exec(")", 0, REG_OK, 0, 1, END); + test_comp("a{1", REG_EXTENDED, REG_EBRACE); + test_comp("a{1,x}", REG_EXTENDED, REG_BADBR); + test_comp("a{1x}", REG_EXTENDED, REG_BADBR); + test_comp("a{1,0}", REG_EXTENDED, REG_BADBR); + test_comp("a{x}", REG_EXTENDED, REG_BADBR); + test_comp("a{}", REG_EXTENDED, REG_BADBR); + + + test_comp("\\", 0, REG_EESCAPE); + test_comp("\\(", 0, REG_EPAREN); + test_comp("\\)", 0, REG_EPAREN); + test_comp("a\\{1", 0, REG_EBRACE); + test_comp("a\\{1,x\\}", 0, REG_BADBR); + test_comp("a\\{1x\\}", 0, REG_BADBR); + test_comp("a\\{1,0\\}", 0, REG_BADBR); + test_comp("a\\{x\\}", 0, REG_BADBR); + test_comp("a\\{\\}", 0, REG_BADBR); + + + + + /* + * Internationalization tests. + */ + + /* This same test with the correct locale is below. */ + test_comp("µ¡+", REG_EXTENDED, 0); + test_exec("¤³¤Î¾Þ¤Ï¡¢µ¡¡¦ÍøÊØÀ¡¦¥»¥", 0, REG_OK, 10, 13, END); + +#if !defined(WIN32) && !defined(__OpenBSD__) + if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") != NULL) + { + printf("\nTesting LC_CTYPE en_US.ISO-8859-1\n"); + test_comp("aBCdeFghiJKlmnoPQRstuvWXyZåäö", REG_ICASE, 0); + test_exec("abCDefGhiJKlmNoPqRStuVwXyzÅÄÖ", 0, REG_OK, 0, 29, END); + } + +#ifdef TRE_MULTIBYTE + if (setlocale(LC_CTYPE, "ja_JP.eucjp") != NULL) + { + printf("\nTesting LC_CTYPE ja_JP.eucjp\n"); + /* I tried to make a test where implementations not aware of multibyte + character sets will fail. I have no idea what the japanese text here + means, I took it from http://www.ipsec.co.jp/. */ + test_comp("µ¡+", REG_EXTENDED, 0); + test_exec("¤³¤Î¾Þ¤Ï¡¢µ¡¡¦ÍøÊØÀ¡¦¥»¥", 0, REG_OK, 10, 12, END); + + test_comp("a", REG_EXTENDED, 0); + test_nexec("foo\000bar", 7, 0, REG_OK, 5, 6, END); + test_comp("c$", REG_EXTENDED, 0); + test_exec("abc", 0, REG_OK, 2, 3, END); + } +#endif /* TRE_MULTIBYTE */ +#endif + + tre_regfree(&reobj); + + printf("\n"); + if (comp_errors || exec_errors) + printf("%d (%d + %d) out of %d tests FAILED!\n", + comp_errors + exec_errors, comp_errors, exec_errors, + comp_tests + exec_tests); + else + printf("All %d tests passed.\n", comp_tests + exec_tests); + + +#ifdef MALLOC_DEBUGGING + if (xmalloc_dump_leaks()) + return 1; +#endif /* MALLOC_DEBUGGING */ + + return comp_errors || exec_errors; +} + +/* EOF */ |