# HG changeset patch # User Matti Hamalainen # Date 1579927661 -7200 # Node ID 1e7e3f96632e0fb0c9519251154ebca3c18737ea # Parent 9fcb0098f302490d5911f10fa9e1a2e76aa1875f And some more work. diff -r 9fcb0098f302 -r 1e7e3f96632e tests.c --- a/tests.c Sat Jan 25 05:46:40 2020 +0200 +++ b/tests.c Sat Jan 25 06:47:41 2020 +0200 @@ -568,7 +568,16 @@ th_regex_char_t *str; size_t nmatches; int flags; -} test_regex_def; +} test_regex_def1; + + +typedef struct +{ + th_regex_char_t *pattern; + th_regex_char_t *str; + size_t nmatches; + int flags; +} test_regex_def2; void test_regex_print_matches(const th_regex_char_t *str, const th_regex_match_t *matches) @@ -586,7 +595,7 @@ } -void test_regex_list(const test_regex_def *list, const th_regex_char_t *pattern) +void test_regex_list1(const test_regex_def1 *list, const th_regex_char_t *pattern) { th_regex_t *expr = NULL; int res; @@ -602,7 +611,7 @@ th_regex_dump(stdout, 1, expr); - for (const test_regex_def *def = list; def->str != NULL; def++) + for (const test_regex_def1 *def = list; def->str != NULL; def++) { th_regex_match_t *matches = NULL; size_t nmatches; @@ -629,6 +638,50 @@ th_regex_free(expr); } +void test_regex_list2(const test_regex_def2 *list) +{ + printf("========================================\n"); + + for (const test_regex_def2 *def = list; def->str != NULL; def++) + { + th_regex_t *expr = NULL; + th_regex_match_t *matches = NULL; + size_t nmatches; + int res; + + printf("Compiling pattern \"%s\"\n", def->pattern); + if ((res = th_regex_compile(&expr, def->pattern)) != THERR_OK) + { + THERR("Regex compilation failed: %s\n", + th_error_str(res)); + goto out; + } + + th_regex_dump(stdout, 1, expr); + + printf("----------------------------------------\n"); + + if ((res = th_regex_match(expr, def->str, + &nmatches, &matches, -1, def->flags)) != THERR_OK) + { + THERR("Regex match returned error: %s\n", + th_error_str(res)); + goto out; + } + + printf("'%s': matched %" PRIu_SIZE_T " time(s), testresult=%s\n", + def->str, + nmatches, + def->nmatches == nmatches ? "YES" : "NO"); + + test_regex_print_matches(def->str, matches); + +out: + th_regex_free_matches(matches); + th_regex_free(expr); + } +} + #endif @@ -906,7 +959,7 @@ #if 0 { - static const test_regex_def tlist[] = + static const test_regex_def1 tlist[] = { { "abcfoabccg" , 1, 0 }, { "abcbcfoabccg" , 1, 0 }, @@ -915,11 +968,11 @@ { NULL , 0, 0 } }; - test_regex_list(tlist, "a(bc){1,2}fo[oab]*cc?g"); + test_regex_list1(tlist, "a(bc){1,2}fo[oab]*cc?g"); } { - static const test_regex_def tlist[] = + static const test_regex_def1 tlist[] = { { "abcfoabccg" , 1, 0 }, { "abcbcfoabccg" , 1, 0 }, @@ -928,11 +981,11 @@ { NULL , 0, 0 } }; - test_regex_list(tlist, "^a(bc){1,2}fo[oab]*cc?g"); + test_regex_list1(tlist, "^a(bc){1,2}fo[oab]*cc?g"); } { - static const test_regex_def tlist[] = + static const test_regex_def1 tlist[] = { { "cg" , 1, 0 }, { "g" , 1, 0 }, @@ -941,12 +994,12 @@ { NULL , 0, 0 } }; - test_regex_list(tlist, "g$"); + test_regex_list1(tlist, "g$"); } #endif { - static const test_regex_def tlist[] = + static const test_regex_def1 tlist[] = { // { "zoobar" , 1, 0 }, { "zoo lol bar" , 1, 0 }, @@ -954,7 +1007,8 @@ { NULL , 0, 0 } }; - test_regex_list(tlist, "zoo.*?bar"); + test_regex_list1(tlist, "zoo.*?bar"); +// test_regex_list(tlist, "zoo.*?bar"); } } #endif diff -r 9fcb0098f302 -r 1e7e3f96632e th_regex.c --- a/th_regex.c Sat Jan 25 05:46:40 2020 +0200 +++ b/th_regex.c Sat Jan 25 06:47:41 2020 +0200 @@ -10,8 +10,15 @@ #ifdef TH_EXPERIMENTAL_REGEX_DEBUG -# define DBG_RE_MATCH(...) do { if (th_dbg_re_flags) fprintf(stdout, __VA_ARGS__); } while (0) BOOL th_dbg_re_flags = FALSE; + +# define DBG_RE_MATCH(...) do { \ + if (th_dbg_re_flags) \ + { \ + th_regex_dump_indent(stdout, level); \ + fprintf(stdout, __VA_ARGS__); \ + } \ + } while (0) #else # define DBG_RE_MATCH(...) #endif @@ -707,6 +714,62 @@ } +static void th_regex_dump_indent(FILE *fh, const int level) +{ + for (int indent = 0; indent < level; indent++) + fprintf(fh, " "); +} + + +static void th_regex_dump_node(FILE *fh, const th_regex_node_t *node) +{ + fprintf(fh, + "%s %s ", + re_match_modes[node->mode], + re_match_types[node->type]); + + if (node->mode == TH_RE_MATCH_COUNT) + { + fprintf(fh, "min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T " : ", + node->repeatMin, node->repeatMax); + } + + switch (node->type) + { + case TH_RE_TYPE_CHAR: + fprintf(fh, "'%c'", node->match.chr); + break; + + case TH_RE_TYPE_STR: + fprintf(fh, "\"%s\"", node->match.str); + break; + + case TH_RE_TYPE_ANY_CHAR: + fprintf(fh, "."); + break; + + case TH_RE_TYPE_LIST: + case TH_RE_TYPE_LIST_REVERSE: + fprintf(fh, "[ "); + for (size_t n = 0; n < node->match.list.nitems; n++) + { + const th_regex_list_item_t *li = &node->match.list.items[n]; + if (li->type) + { + fprintf(fh, "'%c-%c' ", li->start, li->end); + } + else + { + for (size_t i = 0; i < li->nchars; i++) + fprintf(fh, "'%c' ", li->chars[i]); + } + } + fprintf(fh, "]"); + break; + } +} + + void th_regex_dump(FILE *fh, const int level, const th_regex_t *expr) { if (expr != NULL) @@ -715,58 +778,17 @@ { th_regex_node_t *node = &expr->nodes[nnode]; - for (int indent = 0; indent < level; indent++) - fprintf(fh, " "); - - fprintf(fh, "[%" PRIu_SIZE_T " / %" PRIu_SIZE_T "] %s %s ", - nnode + 1, expr->nnodes, - re_match_modes[node->mode], - re_match_types[node->type]); + th_regex_dump_indent(fh, level); - if (node->mode == TH_RE_MATCH_COUNT) - { - fprintf(fh, "min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T " : ", - node->repeatMin, node->repeatMax); - } - - switch (node->type) - { - case TH_RE_TYPE_CHAR: - fprintf(fh, "'%c'\n", node->match.chr); - break; - - case TH_RE_TYPE_STR: - fprintf(fh, "\"%s\"\n", node->match.str); - break; + fprintf(fh, + "[%" PRIu_SIZE_T "/%" PRIu_SIZE_T "] ", + nnode + 1, expr->nnodes); - case TH_RE_TYPE_ANY_CHAR: - fprintf(fh, ".\n"); - break; + th_regex_dump_node(fh, node); + fprintf(fh, "\n"); - case TH_RE_TYPE_LIST: - case TH_RE_TYPE_LIST_REVERSE: - fprintf(fh, "[ "); - for (size_t n = 0; n < node->match.list.nitems; n++) - { - const th_regex_list_item_t *li = &node->match.list.items[n]; - if (li->type) - { - fprintf(fh, "'%c-%c' ", li->start, li->end); - } - else - { - for (size_t i = 0; i < li->nchars; i++) - fprintf(fh, "'%c' ", li->chars[i]); - } - } - fprintf(fh, "]\n"); - break; - - case TH_RE_TYPE_SUBEXPR: - fprintf(fh, "\n"); - th_regex_dump(fh, level + 1, node->match.expr); - break; - } + if (node->type == TH_RE_TYPE_SUBEXPR) + th_regex_dump(fh, level + 1, node->match.expr); } } } @@ -801,7 +823,8 @@ size_t *poffs, const th_regex_t *expr, const size_t startnode, - const int flags + const int flags, + const int level ); @@ -809,7 +832,8 @@ const th_regex_char_t *haystack, size_t *poffs, const th_regex_node_t *node, - const int flags + const int flags, + const int level ) { th_regex_char_t cch; @@ -818,7 +842,7 @@ switch (node->type) { case TH_RE_TYPE_SUBEXPR: - res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags); + res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags, level + 1); break; case TH_RE_TYPE_LIST: @@ -880,7 +904,8 @@ size_t *poffs, const th_regex_t *expr, const size_t startnode, - const int flags + const int flags, + const int level ) { BOOL res = TRUE; @@ -890,24 +915,28 @@ { const th_regex_node_t *node = &expr->nodes[nnode]; - DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n", - nnode + 1, expr->nnodes, - re_match_modes[node->mode], re_match_types[node->type], +#ifdef TH_EXPERIMENTAL_REGEX_DEBUG + th_regex_dump_indent(stdout, level); + fprintf(stdout, + "[%" PRIu_SIZE_T "/%" PRIu_SIZE_T "] ", + nnode + 1, expr->nnodes); + + th_regex_dump_node(stdout, node); + + fprintf(stdout, " <-> \"%s\"\n", haystack + soffs); +#endif switch (node->mode) { case TH_RE_MATCH_ONCE: - res = th_regex_match_one(haystack, &soffs, node, flags); + res = th_regex_match_one(haystack, &soffs, node, flags, level); break; case TH_RE_MATCH_COUNT: { ssize_t count = 0; - DBG_RE_MATCH(" min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", - node->repeatMin, node->repeatMax); - do { BOOL match; @@ -922,7 +951,7 @@ size_t noffs = toffs; for (tnode = nnode; match && tnode < expr->nnodes && haystack[toffs] != 0; ) { - match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags); + match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags, level + 1); if (match) tnode++; } @@ -980,6 +1009,7 @@ const int flags) { size_t nmatches = 0; + int level = 0; (void) flags; if (pnmatches != NULL) @@ -998,10 +1028,7 @@ { size_t coffs = soffs; - DBG_RE_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n", - soffs, haystack + soffs); - - if (th_regex_match_expr(haystack, &coffs, expr, 0, flags)) + if (th_regex_match_expr(haystack, &coffs, expr, 0, flags, level)) { // A match was found, increase count nmatches++; @@ -1012,6 +1039,7 @@ if (pmatches != NULL) { + // Add the match region to the list th_regex_match_t *match = th_malloc0(sizeof(th_regex_match_t)); if (match == NULL) return THERR_MALLOC;