diff th_regex.c @ 666:e1d27caf0dbd

More work on regex stuff.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 27 Jan 2020 12:43:39 +0200
parents c5aa9ada1051
children 039aa00cbfbf
line wrap: on
line diff
--- a/th_regex.c	Mon Jan 27 07:51:07 2020 +0200
+++ b/th_regex.c	Mon Jan 27 12:43:39 2020 +0200
@@ -942,55 +942,87 @@
     const int level
     )
 {
-    size_t toffs = *offs, noffs;
-    BOOL res, match = FALSE;
+    size_t toffs = *offs;
+    BOOL res = FALSE, rest = FALSE;
     ssize_t count = 0;
 
-    if (node->repeatMin > 0)
     do
     {
-        noffs = toffs;
-        match = th_regex_match_one(haystack, &toffs, node, flags, level);
-        if (match)
+        // Attempt to match the repeated node once
+        size_t poffs;
+        BOOL matched;
+
+        poffs = toffs;
+        if ((matched = th_regex_match_one(haystack, &poffs, node, flags, level)))
         {
+            // Matched, increase count
             count++;
+//            DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count);
+
         }
         else
-            toffs = noffs;
+        {
+            // No match, backtrack
+            poffs = toffs;
+//            DBG_RE_PRINT("nope\n");
+            if (rest)
+                break;
+        }
 
-        if (node->repeatMin >= 0 &&
-            count >= node->repeatMin &&
-            node->repeatMax > 0 &&
-            count >= node->repeatMax)
-            break;
+        // Attempt to match rest of the expression if matched
+        // or if required repeats are 0
+        if (matched || node->repeatMin == 0)
+        {
+            size_t qoffs = poffs;
+            DBG_RE_PRINT("try rest '%s'\n", haystack + qoffs);
+            if (th_regex_match_expr(haystack, &qoffs, expr, *nnode + 1, flags, level + 1))
+            {
+                // Matched
 
-    } while (match && toffs > noffs);
+                // Check min repeats and if we are "not greedy".
+                if (count >= node->repeatMin && node->repeatMax == 1)
+                    res = TRUE;
+
+                // Check max repeats
+                if (node->repeatMax > 0 && count >= node->repeatMax)
+                    res = TRUE;
 
-    if (count > 0 || node->repeatMin == 0)
-    {
-        DBG_RE_PRINT("count=%" PRId_SSIZE_T " \"%s\"\n",
-            count, haystack + toffs);
+                DBG_RE_PRINT("yes: res=%s count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", res ? "yes" : "no", count, node->repeatMin, node->repeatMax);
+                toffs = qoffs;
+            }
+            else
+            {
+                // Rest of expression did not match
+                DBG_RE_PRINT("no\n");
+                toffs = poffs;
+            }
 
-        match = th_regex_match_expr(haystack, &toffs, expr, *nnode + 1, flags, level + 1);
+            rest = TRUE;
+        }
+        else
+        {
+            DBG_RE_PRINT("no match and repeatmin>0\n");
+            break;
+        }
 
-        DBG_RE_PRINT("rest expr match=%s \"%s\"\n",
-            match ? "YES" : "NO", haystack + toffs);
-    }
+//        DBG_RE_PRINT("res=%d [%" PRIu_SIZE_T "='%c']\n", res, toffs, haystack[toffs]);
+
+    } while (!res && haystack[toffs] != 0);
 
-    if (match)
+    // Check min repeats and if we are "not greedy".
+    if (count >= node->repeatMin ||
+        (node->repeatMax > 0 && count >= node->repeatMax))
+        res = TRUE;
+
+    if (res)
     {
         *offs = toffs;
         *nnode = expr->nnodes;
     }
 
-    res = match &&
-        (
-        (node->repeatMax > 0 && count >= node->repeatMax) ||
-        (node->repeatMin >= 0 && count >= node->repeatMin)
-        );
-
-    DBG_RE_PRINT("RESULT: match=%s, res=%s\n",
-        match ? "YES" : "NO", res ? "YES" : "NO");
+    DBG_RE_PRINT("RESULT: %s : offs=%" PRIu_SIZE_T "='%s'\n",
+        res ? "YES" : "NO",
+        *offs, haystack + *offs);
 
     return res;
 }