# HG changeset patch # User Matti Hamalainen # Date 1579944959 -7200 # Node ID 2c9260f5cf4470069dfd377a43c2498bafc4e373 # Parent 91c43398c6fcca1fdb369e52d8a939048ce4e628 Tweedle. diff -r 91c43398c6fc -r 2c9260f5cf44 tests.c --- a/tests.c Sat Jan 25 08:01:17 2020 +0200 +++ b/tests.c Sat Jan 25 11:35:59 2020 +0200 @@ -609,7 +609,8 @@ goto out; } - th_regex_dump(stdout, 1, expr); + if (th_verbosity > 0) + th_regex_dump(stdout, 1, expr); for (const test_regex_def1 *def = list; def->str != NULL; def++) { @@ -638,6 +639,7 @@ th_regex_free(expr); } + void test_regex_list2(const test_regex_def2 *list) { printf("========================================\n"); @@ -657,7 +659,8 @@ goto out; } - th_regex_dump(stdout, 1, expr); + if (th_verbosity > 0) + th_regex_dump(stdout, 1, expr); printf("----------------------------------------\n"); diff -r 91c43398c6fc -r 2c9260f5cf44 th_regex.c --- a/th_regex.c Sat Jan 25 08:01:17 2020 +0200 +++ b/th_regex.c Sat Jan 25 11:35:59 2020 +0200 @@ -813,8 +813,10 @@ if (item->type == 0) { for (size_t n = 0; n < item->nchars; n++) - if (item->chars[n] == cch) - return TRUE; + { + if (item->chars[n] == cch) + return TRUE; + } } else { @@ -829,7 +831,7 @@ static BOOL th_regex_match_expr( const th_regex_char_t *haystack, - size_t *poffs, + size_t *offs, const th_regex_t *expr, const size_t startnode, const int flags, @@ -839,7 +841,7 @@ static BOOL th_regex_match_one( const th_regex_char_t *haystack, - size_t *poffs, + size_t *offs, const th_regex_node_t *node, const int flags, const int level @@ -851,12 +853,12 @@ switch (node->type) { case TH_RE_TYPE_SUBEXPR: - res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags, level + 1); + res = th_regex_match_expr(haystack, offs, node->match.expr, 0, flags, level + 1); break; case TH_RE_TYPE_LIST: case TH_RE_TYPE_LIST_REVERSE: - if ((cch = haystack[*poffs]) == 0) + if ((cch = haystack[*offs]) == 0) res = FALSE; else { @@ -865,27 +867,27 @@ if (node->type == TH_RE_TYPE_LIST_REVERSE) res = !res; - (*poffs)++; + (*offs)++; } break; case TH_RE_TYPE_ANY_CHAR: - if ((cch = haystack[*poffs]) == 0) + if ((cch = haystack[*offs]) == 0) res = FALSE; else { res = TRUE; - (*poffs)++; + (*offs)++; } break; case TH_RE_TYPE_CHAR: - if ((cch = haystack[*poffs]) == 0) + if ((cch = haystack[*offs]) == 0) res = FALSE; else { res = (cch == node->match.chr); - (*poffs)++; + (*offs)++; } break; @@ -893,9 +895,9 @@ res = TRUE; for (th_regex_char_t *str = node->match.str; res && *str != 0; - str++, (*poffs)++) + str++, (*offs)++) { - if (haystack[*poffs] != *str) + if (haystack[*offs] != *str) res = FALSE; } break; @@ -905,9 +907,73 @@ } +static BOOL th_regex_match_count( + const th_regex_char_t *haystack, + size_t *offs, + const th_regex_t *expr, + const th_regex_node_t *node, + size_t *nnode, + const int flags, + const int level + ) +{ + size_t toffs = *offs, noffs; + BOOL res, match = FALSE; + ssize_t count = 0; + + if (node->repeatMin > 0) + do + { + noffs = toffs; + match = th_regex_match_one(haystack, &toffs, node, flags, level); + if (match) + { + count++; + } + else + toffs = noffs; + + if (node->repeatMin >= 0 && + count >= node->repeatMin && + node->repeatMax > 0 && + count >= node->repeatMax) + break; + + } while (match && toffs > noffs); + + if (count > 0 || node->repeatMin == 0) + { + DBG_RE_MATCH("count=%" PRId_SSIZE_T " \"%s\"\n", + count, haystack + toffs); + + match = th_regex_match_expr(haystack, &toffs, expr, *nnode + 1, flags, level + 1); + + DBG_RE_MATCH("rest expr match=%s \"%s\"\n", + match ? "YES" : "NO", haystack + toffs); + } + + if (match) + { + *offs = toffs; + *nnode = expr->nnodes; + } + + res = match && + ( + (node->repeatMax > 0 && count >= node->repeatMax) || + (node->repeatMin >= 0 && count >= node->repeatMin) + ); + + DBG_RE_MATCH("RESULT: match=%s, res=%s\n", + match ? "YES" : "NO", res ? "YES" : "NO"); + + return res; +} + + static BOOL th_regex_match_expr( const th_regex_char_t *haystack, - size_t *poffs, + size_t *offs, const th_regex_t *expr, const size_t startnode, const int flags, @@ -915,7 +981,7 @@ ) { BOOL res = TRUE; - size_t soffs = *poffs; + size_t soffs = *offs; for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++) { @@ -943,60 +1009,7 @@ break; case TH_RE_MATCH_COUNT: - { - ssize_t count = 0, ncount = 0; - - do - { - BOOL match; - size_t toffs = soffs, noffs; - - DBG_RE_MATCH(" ROUND #%" PRIu_SIZE_T ": '%s'\n", - count, haystack + toffs); - - res = FALSE; - match = TRUE; - do - { - noffs = toffs; - match = th_regex_match_one(haystack, &toffs, node, flags, level + 1); - if (match) - { - ncount++; - - if (node->repeatMin >= 0 && ncount >= node->repeatMin) - break; - } - } while (match && haystack[toffs] != 0 && toffs > noffs); - - DBG_RE_MATCH(" ROUND #%" PRIu_SIZE_T " END: match=%s \"%s\"\n", - ncount, match ? "YES" : "NO", haystack + toffs); - - if (ncount > 0) - { - match = th_regex_match_expr(haystack, &toffs, expr, nnode + 1, flags, level + 2); - } - - DBG_RE_MATCH(" ROUND #%" PRIu_SIZE_T " END: match=%s \"%s\"\n", - count, match ? "YES" : "NO", haystack + toffs); - - if (match) - { - // Node matched - count++; - soffs = toffs; - nnode = expr->nnodes; - - res = - (node->repeatMax > 0 && ncount >= node->repeatMax) || - (node->repeatMin >= 0 && ncount >= node->repeatMin); - } - - } while (!res); - - DBG_RE_MATCH(" RESULT: count=%" PRId_SSIZE_T ", ncount=%" PRId_SSIZE_T ", done=%s\n", - count, ncount, res ? "YES" : "NO"); - } + res = th_regex_match_count(haystack, &soffs, expr, node, &nnode, flags, level); break; case TH_RE_MATCH_ANCHOR_START: @@ -1010,7 +1023,7 @@ } if (res) - *poffs = soffs; + *offs = soffs; return res; }