Mercurial > hg > th-libs
diff th_regex.c @ 666:e1d27caf0dbd
More work on regex stuff.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 27 Jan 2020 12:43:39 +0200 |
parents | c5aa9ada1051 |
children | 039aa00cbfbf |
line wrap: on
line diff
--- a/th_regex.c Mon Jan 27 07:51:07 2020 +0200 +++ b/th_regex.c Mon Jan 27 12:43:39 2020 +0200 @@ -942,55 +942,87 @@ const int level ) { - size_t toffs = *offs, noffs; - BOOL res, match = FALSE; + size_t toffs = *offs; + BOOL res = FALSE, rest = FALSE; ssize_t count = 0; - if (node->repeatMin > 0) do { - noffs = toffs; - match = th_regex_match_one(haystack, &toffs, node, flags, level); - if (match) + // Attempt to match the repeated node once + size_t poffs; + BOOL matched; + + poffs = toffs; + if ((matched = th_regex_match_one(haystack, &poffs, node, flags, level))) { + // Matched, increase count count++; +// DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); + } else - toffs = noffs; + { + // No match, backtrack + poffs = toffs; +// DBG_RE_PRINT("nope\n"); + if (rest) + break; + } - if (node->repeatMin >= 0 && - count >= node->repeatMin && - node->repeatMax > 0 && - count >= node->repeatMax) - break; + // Attempt to match rest of the expression if matched + // or if required repeats are 0 + if (matched || node->repeatMin == 0) + { + size_t qoffs = poffs; + DBG_RE_PRINT("try rest '%s'\n", haystack + qoffs); + if (th_regex_match_expr(haystack, &qoffs, expr, *nnode + 1, flags, level + 1)) + { + // Matched - } while (match && toffs > noffs); + // Check min repeats and if we are "not greedy". + if (count >= node->repeatMin && node->repeatMax == 1) + res = TRUE; + + // Check max repeats + if (node->repeatMax > 0 && count >= node->repeatMax) + res = TRUE; - if (count > 0 || node->repeatMin == 0) - { - DBG_RE_PRINT("count=%" PRId_SSIZE_T " \"%s\"\n", - count, haystack + toffs); + DBG_RE_PRINT("yes: res=%s count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", res ? "yes" : "no", count, node->repeatMin, node->repeatMax); + toffs = qoffs; + } + else + { + // Rest of expression did not match + DBG_RE_PRINT("no\n"); + toffs = poffs; + } - match = th_regex_match_expr(haystack, &toffs, expr, *nnode + 1, flags, level + 1); + rest = TRUE; + } + else + { + DBG_RE_PRINT("no match and repeatmin>0\n"); + break; + } - DBG_RE_PRINT("rest expr match=%s \"%s\"\n", - match ? "YES" : "NO", haystack + toffs); - } +// DBG_RE_PRINT("res=%d [%" PRIu_SIZE_T "='%c']\n", res, toffs, haystack[toffs]); + + } while (!res && haystack[toffs] != 0); - if (match) + // Check min repeats and if we are "not greedy". + if (count >= node->repeatMin || + (node->repeatMax > 0 && count >= node->repeatMax)) + res = TRUE; + + if (res) { *offs = toffs; *nnode = expr->nnodes; } - res = match && - ( - (node->repeatMax > 0 && count >= node->repeatMax) || - (node->repeatMin >= 0 && count >= node->repeatMin) - ); - - DBG_RE_PRINT("RESULT: match=%s, res=%s\n", - match ? "YES" : "NO", res ? "YES" : "NO"); + DBG_RE_PRINT("RESULT: %s : offs=%" PRIu_SIZE_T "='%s'\n", + res ? "YES" : "NO", + *offs, haystack + *offs); return res; }