# HG changeset patch # User Matti Hamalainen # Date 1580137626 -7200 # Node ID 039aa00cbfbf51c50c4be8f39eafc9c45eb4ac0a # Parent e1d27caf0dbd4fc585f16a7ba762dd0fd761b61b Work on regex matcher. diff -r e1d27caf0dbd -r 039aa00cbfbf th_regex.c --- a/th_regex.c Mon Jan 27 12:43:39 2020 +0200 +++ b/th_regex.c Mon Jan 27 17:07:06 2020 +0200 @@ -942,81 +942,81 @@ const int level ) { - size_t toffs = *offs; - BOOL res = FALSE, rest = FALSE; + size_t toffs = *offs, last_offs = *offs; ssize_t count = 0; do { // Attempt to match the repeated node once - size_t poffs; - BOOL matched; + size_t poffs = toffs; + if (th_regex_match_one(haystack, &poffs, node, flags, level)) + { + // Matched, increase count of repeats + count++; + //DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); + + // poffs should now be at position + 1 from match + } + else + { + // Did not match, get out if repeatMin > 0 + if (node->repeatMin > 0) + break; + } + + // Attempt to match rest of the expression + size_t qoffs1 = poffs, qoffs2 = toffs; + DBG_RE_PRINT("try rest '%s' :: '%s'\n", haystack + qoffs1, haystack + qoffs2); + if (th_regex_match_expr(haystack, &qoffs1, expr, *nnode + 1, flags, level + 1)) + { + // Matched + toffs = last_offs = qoffs1; - poffs = toffs; - if ((matched = th_regex_match_one(haystack, &poffs, node, flags, level))) + DBG_RE_PRINT(" yes1: count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", count, node->repeatMin, node->repeatMax); + + // Check min repeats and if we are "not greedy". + if (count >= node->repeatMin && node->repeatMax == 1) + break; + + // Check max repeats + if (node->repeatMax > 0 && count >= node->repeatMax) + break; + } + else + if (node->repeatMin == 0 && + th_regex_match_expr(haystack, &qoffs2, expr, *nnode + 1, flags, level + 1)) { - // Matched, increase count - count++; -// DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); + // Matched + toffs = last_offs = qoffs2; + + DBG_RE_PRINT(" yes2: count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", count, node->repeatMin, node->repeatMax); + + // Check min repeats and if we are "not greedy". + if (count >= node->repeatMin && node->repeatMax == 1) + break; + + // Check max repeats + if (node->repeatMax > 0 && count >= node->repeatMax) + break; } else { - // No match, backtrack - poffs = toffs; -// DBG_RE_PRINT("nope\n"); - if (rest) - break; + // Rest of expression did not match, try again + DBG_RE_PRINT(" no\n"); + toffs = poffs; } - // Attempt to match rest of the expression if matched - // or if required repeats are 0 - if (matched || node->repeatMin == 0) - { - size_t qoffs = poffs; - DBG_RE_PRINT("try rest '%s'\n", haystack + qoffs); - if (th_regex_match_expr(haystack, &qoffs, expr, *nnode + 1, flags, level + 1)) - { - // Matched - // Check min repeats and if we are "not greedy". - if (count >= node->repeatMin && node->repeatMax == 1) - res = TRUE; - - // Check max repeats - if (node->repeatMax > 0 && count >= node->repeatMax) - res = TRUE; + } while (haystack[toffs] != 0); - DBG_RE_PRINT("yes: res=%s count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", res ? "yes" : "no", count, node->repeatMin, node->repeatMax); - toffs = qoffs; - } - else - { - // Rest of expression did not match - DBG_RE_PRINT("no\n"); - toffs = poffs; - } - - rest = TRUE; - } - else - { - DBG_RE_PRINT("no match and repeatmin>0\n"); - break; - } - -// DBG_RE_PRINT("res=%d [%" PRIu_SIZE_T "='%c']\n", res, toffs, haystack[toffs]); - - } while (!res && haystack[toffs] != 0); - - // Check min repeats and if we are "not greedy". - if (count >= node->repeatMin || - (node->repeatMax > 0 && count >= node->repeatMax)) - res = TRUE; + // Check results + BOOL res = count >= node->repeatMin || + (node->repeatMax > 0 && count >= node->repeatMax); if (res) { - *offs = toffs; + *offs = last_offs; *nnode = expr->nnodes; }