changeset 667:039aa00cbfbf

Work on regex matcher.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 27 Jan 2020 17:07:06 +0200
parents e1d27caf0dbd
children 48e8820bc625
files th_regex.c
diffstat 1 files changed, 57 insertions(+), 57 deletions(-) [+]
line wrap: on
line diff
--- a/th_regex.c	Mon Jan 27 12:43:39 2020 +0200
+++ b/th_regex.c	Mon Jan 27 17:07:06 2020 +0200
@@ -942,81 +942,81 @@
     const int level
     )
 {
-    size_t toffs = *offs;
-    BOOL res = FALSE, rest = FALSE;
+    size_t toffs = *offs, last_offs = *offs;
     ssize_t count = 0;
 
     do
     {
         // Attempt to match the repeated node once
-        size_t poffs;
-        BOOL matched;
+        size_t poffs = toffs;
+        if (th_regex_match_one(haystack, &poffs, node, flags, level))
+        {
+            // Matched, increase count of repeats
+            count++;
+            //DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count);
+
+            // poffs should now be at position + 1 from match
+        }
+        else
+        {
+            // Did not match, get out if repeatMin > 0
+            if (node->repeatMin > 0)
+                break;
+        }
+
+        // Attempt to match rest of the expression
+        size_t qoffs1 = poffs, qoffs2 = toffs;
+        DBG_RE_PRINT("try rest '%s' :: '%s'\n", haystack + qoffs1, haystack + qoffs2);
+        if (th_regex_match_expr(haystack, &qoffs1, expr, *nnode + 1, flags, level + 1))
+        {
+            // Matched
+            toffs = last_offs = qoffs1;
 
-        poffs = toffs;
-        if ((matched = th_regex_match_one(haystack, &poffs, node, flags, level)))
+            DBG_RE_PRINT("  yes1: count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", count, node->repeatMin, node->repeatMax);
+
+            // Check min repeats and if we are "not greedy".
+            if (count >= node->repeatMin && node->repeatMax == 1)
+                break;
+
+            // Check max repeats
+            if (node->repeatMax > 0 && count >= node->repeatMax)
+                break;
+        }
+        else
+        if (node->repeatMin == 0 &&
+            th_regex_match_expr(haystack, &qoffs2, expr, *nnode + 1, flags, level + 1))
         {
-            // Matched, increase count
-            count++;
-//            DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count);
+            // Matched
+            toffs = last_offs = qoffs2;
+
+            DBG_RE_PRINT("  yes2: count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", count, node->repeatMin, node->repeatMax);
+
+            // Check min repeats and if we are "not greedy".
+            if (count >= node->repeatMin && node->repeatMax == 1)
+                break;
+
+            // Check max repeats
+            if (node->repeatMax > 0 && count >= node->repeatMax)
+                break;
 
         }
         else
         {
-            // No match, backtrack
-            poffs = toffs;
-//            DBG_RE_PRINT("nope\n");
-            if (rest)
-                break;
+            // Rest of expression did not match, try again
+            DBG_RE_PRINT("  no\n");
+            toffs = poffs;
         }
 
-        // Attempt to match rest of the expression if matched
-        // or if required repeats are 0
-        if (matched || node->repeatMin == 0)
-        {
-            size_t qoffs = poffs;
-            DBG_RE_PRINT("try rest '%s'\n", haystack + qoffs);
-            if (th_regex_match_expr(haystack, &qoffs, expr, *nnode + 1, flags, level + 1))
-            {
-                // Matched
 
-                // Check min repeats and if we are "not greedy".
-                if (count >= node->repeatMin && node->repeatMax == 1)
-                    res = TRUE;
-
-                // Check max repeats
-                if (node->repeatMax > 0 && count >= node->repeatMax)
-                    res = TRUE;
+    } while (haystack[toffs] != 0);
 
-                DBG_RE_PRINT("yes: res=%s count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", res ? "yes" : "no", count, node->repeatMin, node->repeatMax);
-                toffs = qoffs;
-            }
-            else
-            {
-                // Rest of expression did not match
-                DBG_RE_PRINT("no\n");
-                toffs = poffs;
-            }
-
-            rest = TRUE;
-        }
-        else
-        {
-            DBG_RE_PRINT("no match and repeatmin>0\n");
-            break;
-        }
-
-//        DBG_RE_PRINT("res=%d [%" PRIu_SIZE_T "='%c']\n", res, toffs, haystack[toffs]);
-
-    } while (!res && haystack[toffs] != 0);
-
-    // Check min repeats and if we are "not greedy".
-    if (count >= node->repeatMin ||
-        (node->repeatMax > 0 && count >= node->repeatMax))
-        res = TRUE;
+    // Check results
+    BOOL res = count >= node->repeatMin ||
+        (node->repeatMax > 0 && count >= node->repeatMax);
 
     if (res)
     {
-        *offs = toffs;
+        *offs = last_offs;
         *nnode = expr->nnodes;
     }