changeset 643:a2bf1ea05b05

Cleanups.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 24 Jan 2020 12:06:43 +0200
parents 3a35db5c1873
children 562de49f8b4b
files tests.c th_regex.c
diffstat 2 files changed, 32 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/tests.c	Fri Jan 24 09:39:09 2020 +0200
+++ b/tests.c	Fri Jan 24 12:06:43 2020 +0200
@@ -577,7 +577,6 @@
     int res;
 
     printf("========================================\n");
-    printf("pattern '%s'\n", pattern);
     if ((res = th_regex_compile(&reg, pattern)) != THERR_OK)
     {
         THERR("Regex compilation failed: %s\n",
@@ -590,6 +589,7 @@
         th_regex_match_t *matches = NULL;
         size_t nmatches;
 
+        printf("----------------------------------------\n");
         if ((res = th_regex_match(reg, def->str,
             &nmatches, &matches, -1, def->flags)) != THERR_OK)
         {
@@ -598,6 +598,7 @@
             goto out;
         }
 
+        printf("\npattern '%s'\n", pattern);
         printf("  '%s': matched %" PRIu_SIZE_T " time(s), testresult=%s\n",
             def->str,
             nmatches,
@@ -879,7 +880,7 @@
     if (test_set_start("Regular expressions"))
     {
 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
-        th_dbg_re_flags = TH_DBG_RE_MATCH;
+        th_dbg_re_flags = th_verbosity > 0 ? TH_DBG_RE_MATCH : 0;
 #endif
 
 #if 0
@@ -890,8 +891,6 @@
                 printf("result: %s\n", th_error_str(res));
             th_regex_free(reg);
         }
-
-        //
         {
             static const test_regex_def tlist[] =
             {
--- a/th_regex.c	Fri Jan 24 09:39:09 2020 +0200
+++ b/th_regex.c	Fri Jan 24 12:06:43 2020 +0200
@@ -19,8 +19,7 @@
 static const char *re_match_modes[] =
 {
     "ONCE",
-    "COUNT GREEDY",
-    "COUNT NONGREEDY",
+    "COUNT",
     "ANCHOR START",
     "ANCHOR END",
 };
@@ -83,8 +82,7 @@
 enum
 {
     TH_RE_MATCH_ONCE,
-    TH_RE_MATCH_COUNT_GREEDY,
-    TH_RE_MATCH_COUNT_NONGREEDY,
+    TH_RE_MATCH_COUNT,
 
     TH_RE_MATCH_ANCHOR_START,
     TH_RE_MATCH_ANCHOR_END,
@@ -423,33 +421,21 @@
 
                 if (cch == '?')
                 {
-                    // Check if previous was a count
-                    if (pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY)
-                    {
-                        res = THERR_INVALID_DATA;
-                        goto exit;
-                    }
-                    else
-                    if (pnode->mode != TH_RE_MATCH_COUNT_GREEDY)
-                    {
-                        // Previous token is optional (repeat 0-1 times)
-                        pnode->repeatMin = 0;
-                        pnode->repeatMax = 1;
-                    }
-
-                    pnode->mode = TH_RE_MATCH_COUNT_NONGREEDY;
+                    // Previous token is optional (repeat 0-1 times) (non-greedy matching)
+                    pnode->mode = TH_RE_MATCH_COUNT;
+                    pnode->repeatMin = 0;
+                    pnode->repeatMax = 1;
                 }
                 else
                 {
                     // Check if previous was a count ("**", "*+", etc.)
-                    if (pnode->mode == TH_RE_MATCH_COUNT_GREEDY ||
-                        pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY)
+                    if (pnode->mode == TH_RE_MATCH_COUNT)
                     {
                         res = THERR_INVALID_DATA;
                         goto exit;
                     }
 
-                    pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
+                    pnode->mode = TH_RE_MATCH_COUNT;
 
                     if (cch == '*')
                     {
@@ -483,7 +469,7 @@
                     ctx.offs - start)) != THERR_OK)
                     goto exit;
 
-                pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
+                pnode->mode = TH_RE_MATCH_COUNT;
 
                 if (th_regex_find_next(tmp, 0, &start, ','))
                 {
@@ -744,18 +730,17 @@
         const th_regex_node_t *node = &expr->nodes[nnode];
 
         DBG_RE_MATCH("  expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n",
-            nnode, expr->nnodes,
+            nnode + 1, expr->nnodes,
             re_match_modes[node->mode], re_match_types[node->type],
-            haystack + *poffs);
+            haystack + soffs);
 
         switch (node->mode)
         {
             case TH_RE_MATCH_ONCE:
-                res = th_regex_match_one(haystack, poffs, node, flags);
+                res = th_regex_match_one(haystack, &soffs, node, flags);
                 break;
 
-            case TH_RE_MATCH_COUNT_GREEDY:
-            case TH_RE_MATCH_COUNT_NONGREEDY:
+            case TH_RE_MATCH_COUNT:
                 {
                     ssize_t count = 0;
 
@@ -765,26 +750,29 @@
                     do
                     {
                         BOOL match;
-                        size_t toffs = *poffs, tnode = nnode;
+                        size_t toffs = soffs, tnode;
 
+                        DBG_RE_MATCH("    START '%s'\n", haystack + toffs);
                         do {
-                            match = th_regex_match_one(haystack, &toffs, node, flags);
-                            if (match && haystack[toffs] != 0)
+                            match = TRUE;
+                            for (tnode = nnode; match && tnode < expr->nnodes && haystack[toffs] != 0; )
                             {
-                                for (tnode = nnode + 1; match && tnode < expr->nnodes && haystack[toffs] != 0; tnode++)
-                                {
-                                    size_t noffs = toffs;
-                                    match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags);
-                                    DBG_RE_MATCH("    '%s': %d\n", haystack + noffs, match);
-                                }
+                                match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags);
+                                if (match)
+                                    tnode++;
                             }
+                            DBG_RE_MATCH("    '%s': %d (tnode=%" PRIu_SIZE_T ")\n", haystack + toffs, match, tnode);
+                            if (node->repeatMin >= 0 && match)
+                                break;
                         } while (!match && haystack[toffs] != 0);
 
+                        DBG_RE_MATCH("    END '%s': %d (tnode=%" PRIu_SIZE_T ")\n", haystack + toffs, match, tnode);
+
                         if (match)
                         {
                             // Node matched
                             count++;
-                            *poffs = soffs;
+                            soffs = toffs;
                             nnode = tnode;
                             res = (node->repeatMax > 0 && count >= node->repeatMax);
                         }
@@ -801,18 +789,17 @@
                 }
                 break;
 
-
             case TH_RE_MATCH_ANCHOR_START:
-                res = (*poffs == 0);
+                res = (soffs == 0);
                 break;
 
             case TH_RE_MATCH_ANCHOR_END:
-                res = (haystack[*poffs] == 0);
+                res = (haystack[soffs] == 0);
                 break;
         }
     }
 
-    if (!res)
+    if (res)
         *poffs = soffs;
 
     return res;