changeset 640:9e1f9e1d1487

Aaand some more work. Still just a broken concept.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 24 Jan 2020 09:13:24 +0200
parents 8c957ad9d4c3
children 9a1ed82abefd
files tests.c th_regex.c th_regex.h
diffstat 3 files changed, 271 insertions(+), 294 deletions(-) [+]
line wrap: on
line diff
--- a/tests.c	Thu Jan 23 11:38:28 2020 +0200
+++ b/tests.c	Fri Jan 24 09:13:24 2020 +0200
@@ -565,15 +565,15 @@
 
 typedef struct
 {
-    th_regex_char *str;
+    th_regex_char_t *str;
     size_t nmatches;
     int flags;
 } test_regex_def;
 
 
-void test_regex_list(const test_regex_def *list, const th_regex_char *pattern)
+void test_regex_list(const test_regex_def *list, const th_regex_char_t *pattern)
 {
-    th_regex_ctx *reg = NULL;
+    th_regex_t *reg = NULL;
     int res;
 
     printf("========================================\n");
@@ -587,7 +587,7 @@
 
     for (const test_regex_def *def = list; def->str != NULL; def++)
     {
-        th_regex_match_node *matches = NULL;
+        th_regex_match_t *matches = NULL;
         size_t nmatches;
 
         if ((res = th_regex_match(reg, def->str,
@@ -603,8 +603,8 @@
             nmatches,
             def->nmatches == nmatches ? "YES" : "NO");
 
-        for (th_regex_match_node *m = matches;
-            m != NULL; m = (th_regex_match_node *) m->node.next)
+        for (th_regex_match_t *m = matches;
+            m != NULL; m = (th_regex_match_t *) m->node.next)
         {
             char *tmp = th_strndup(def->str + m->start, m->len);
             printf("      match [%" PRIu_SIZE_T " ++ %" PRIu_SIZE_T "]: '%s'\n",
@@ -878,18 +878,18 @@
 #ifdef TH_EXPERIMENTAL_REGEX
     if (test_set_start("Regular expressions"))
     {
-        th_regex_ctx *reg = NULL;
-        int res;
-
 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
         th_dbg_re_flags = TH_DBG_RE_MATCH;
 #endif
 
 #if 0
-        res = th_regex_compile(&reg, "z*k+abba fabboa? k{4} [gz]{1,2} foo(bar|zoo)?");
-        if (res != THERR_OK)
-            printf("result: %s\n", th_error_str(res));
-        th_regex_free(reg);
+        {
+            th_regex_t *reg = NULL;
+            int res = th_regex_compile(&reg, "z*k+abba fabboa? k{4} [gz]{1,2} foo(bar|zoo)?");
+            if (res != THERR_OK)
+                printf("result: %s\n", th_error_str(res));
+            th_regex_free(reg);
+        }
 
         //
         {
--- a/th_regex.c	Thu Jan 23 11:38:28 2020 +0200
+++ b/th_regex.c	Fri Jan 24 09:13:24 2020 +0200
@@ -10,10 +10,30 @@
 
 
 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
+#    define DBG_RE_COMPILE(...) do { if (th_dbg_re_flags & TH_DBG_RE_COMPILE) fprintf(stdout, __VA_ARGS__); } while (0)
+#    define DBG_RE_FREE(...)    do { if (th_dbg_re_flags & TH_DBG_RE_FREE) fprintf(stdout, __VA_ARGS__); } while (0)
+#    define DBG_RE_MATCH(...)   do { if (th_dbg_re_flags & TH_DBG_RE_MATCH) fprintf(stdout, __VA_ARGS__); } while (0)
+
 int th_dbg_re_flags = 0;
-#    define DBG_RE_COMPILE(...) do { if (th_dbg_re_flags & TH_DBG_RE_COMPILE) fprintf(stderr, __VA_ARGS__); } while (0)
-#    define DBG_RE_FREE(...)    do { if (th_dbg_re_flags & TH_DBG_RE_FREE) fprintf(stderr, __VA_ARGS__); } while (0)
-#    define DBG_RE_MATCH(...)   do { if (th_dbg_re_flags & TH_DBG_RE_MATCH) fprintf(stderr, __VA_ARGS__); } while (0)
+
+static const char *re_match_modes[] =
+{
+    "ONCE",
+    "COUNT GREEDY",
+    "COUNT NONGREEDY",
+    "ANCHOR START",
+    "ANCHOR END",
+};
+
+static const char *re_match_types[] =
+{
+    "CHAR",
+    "ANY",
+    "LIST",
+    "LIST REVERSE",
+    "SUBEXPR",
+};
+
 #else
 #    define DBG_RE_COMPILE(...)
 #    define DBG_RE_FREE(...)
@@ -23,11 +43,49 @@
 
 #ifdef TH_EXPERIMENTAL_REGEX
 
+typedef struct
+{
+    int type;
+    th_regex_char_t start, end;
+
+    size_t nchars;
+    th_regex_char_t *chars;
+} th_regex_list_item_t;
+
+
+typedef struct
+{
+    size_t nitems, itemssize;
+    th_regex_list_item_t *items;
+} th_regex_list_t;
+
+
+typedef struct
+{
+    int mode, type;
+    ssize_t repeatMin, repeatMax;
+
+    struct {
+        th_regex_char_t chr;
+        th_regex_list_t list;
+        struct th_regex_t *expr;
+    } match;
+} th_regex_node_t;
+
+
+struct th_regex_t
+{
+    size_t nnodes, nodessize;
+    th_regex_node_t *nodes;
+};
+
+
 enum
 {
     TH_RE_MATCH_ONCE,
     TH_RE_MATCH_COUNT_GREEDY,
     TH_RE_MATCH_COUNT_NONGREEDY,
+
     TH_RE_MATCH_ANCHOR_START,
     TH_RE_MATCH_ANCHOR_END,
 };
@@ -43,68 +101,47 @@
 };
 
 
-#ifdef TH_EXPERIMENTAL_REGEX_DEBUG
-static const char *re_match_modes[] =
+typedef struct
 {
-    "ONCE",
-    "COUNT GREEDY",
-    "COUNT NONGREEDY",
-    "ANCHOR_START",
-    "ANCHOR_END",
-};
+    const th_regex_char_t *pattern;
+    size_t offs;
 
-static const char *re_match_types[] =
-{
-    "CHAR",
-    "ANY",
-    "LIST",
-    "LIST_REVERSE",
-    "SUBEXPR",
-};
-#endif
+    th_regex_t *data;
+
+    size_t nstack, stacksize;
+    th_regex_t **stack;
+} th_regex_parse_ctx_t;
 
 
-typedef struct
+static void th_regex_node_init(th_regex_node_t *node)
 {
-    const th_regex_char *pattern;
-    size_t offs;
-
-    th_regex_ctx *data;
-
-    size_t nstack, stacksize;
-    th_regex_ctx **stack;
-} th_regex_parse_ctx;
-
-
-static void th_regex_node_init(th_regex_node *node)
-{
-    memset(node, 0, sizeof(th_regex_node));
+    memset(node, 0, sizeof(th_regex_node_t));
     node->mode = TH_RE_MATCH_ONCE;
 }
 
 
-static int th_regex_strndup(th_regex_char **pdst,
-    const th_regex_char *src, const size_t len)
+static int th_regex_strndup(th_regex_char_t **pdst,
+    const th_regex_char_t *src, const size_t len)
 {
     if (pdst == NULL)
         return THERR_NULLPTR;
 
-    if (UINTPTR_MAX / sizeof(th_regex_char) < len + 1)
+    if (UINTPTR_MAX / sizeof(th_regex_char_t) < len + 1)
         return THERR_BOUNDS;
 
-    if ((*pdst = (th_regex_char *)
-        th_malloc((len + 1) * sizeof(th_regex_char))) == NULL)
+    if ((*pdst = (th_regex_char_t *)
+        th_malloc((len + 1) * sizeof(th_regex_char_t))) == NULL)
         return THERR_MALLOC;
 
-    memcpy(*pdst, src, len * sizeof(th_regex_char));
+    memcpy(*pdst, src, len * sizeof(th_regex_char_t));
     (*pdst)[len] = 0;
 
     return THERR_OK;
 }
 
 
-static int th_regex_ctx_get_prev_node(
-    th_regex_parse_ctx *ctx, th_regex_node **pnode)
+static int th_regex_parse_ctx_get_prev_node(
+    th_regex_parse_ctx_t *ctx, th_regex_node_t **pnode)
 {
     if (ctx->data != NULL && ctx->data->nnodes > 0)
     {
@@ -116,14 +153,14 @@
 }
 
 
-static int th_regex_ctx_push(th_regex_parse_ctx *ctx)
+static int th_regex_parse_ctx_push(th_regex_parse_ctx_t *ctx)
 {
     if (ctx->stack == NULL || ctx->nstack + 1 >= ctx->stacksize)
     {
         ctx->stacksize += 16;
 
         if ((ctx->stack = th_realloc(ctx->stack,
-            ctx->stacksize * sizeof(th_regex_node *))) == NULL)
+            ctx->stacksize * sizeof(th_regex_node_t *))) == NULL)
             return THERR_MALLOC;
     }
 
@@ -135,7 +172,7 @@
 }
 
 
-static int th_regex_ctx_pop(th_regex_parse_ctx *ctx, th_regex_ctx **data)
+static int th_regex_parse_ctx_pop(th_regex_parse_ctx_t *ctx, th_regex_t **data)
 {
     if (ctx->nstack > 0)
     {
@@ -149,13 +186,13 @@
 }
 
 
-static int th_regex_ctx_node_commit(th_regex_parse_ctx *ctx, th_regex_node *node)
+static int th_regex_parse_ctx_node_commit(th_regex_parse_ctx_t *ctx, th_regex_node_t *node)
 {
-    th_regex_ctx *data = ctx->data;
+    th_regex_t *data = ctx->data;
 
     if (data == NULL)
     {
-        if ((data = ctx->data = th_malloc0(sizeof(th_regex_ctx))) == NULL)
+        if ((data = ctx->data = th_malloc0(sizeof(th_regex_t))) == NULL)
             return THERR_MALLOC;
     }
 
@@ -163,11 +200,11 @@
     {
         data->nodessize += 16;
         if ((data->nodes = th_realloc(data->nodes,
-            data->nodessize * sizeof(th_regex_node))) == NULL)
+            data->nodessize * sizeof(th_regex_node_t))) == NULL)
             return THERR_MALLOC;
     }
 
-    memcpy(&data->nodes[data->nnodes], node, sizeof(th_regex_node));
+    memcpy(&data->nodes[data->nnodes], node, sizeof(th_regex_node_t));
     data->nnodes++;
 
     DBG_RE_COMPILE(
@@ -181,9 +218,9 @@
 }
 
 
-static BOOL th_regex_find_next(const th_regex_char *str,
+static BOOL th_regex_find_next(const th_regex_char_t *str,
     const size_t start, size_t *offs,
-    const th_regex_char delim)
+    const th_regex_char_t delim)
 {
     for (*offs = start; str[*offs] != 0; (*offs)++)
     {
@@ -194,10 +231,10 @@
 }
 
 
-static BOOL th_regex_parse_ssize_t(const th_regex_char *str,
+static BOOL th_regex_parse_ssize_t(const th_regex_char_t *str,
     ssize_t *value)
 {
-    th_regex_char ch;
+    th_regex_char_t ch;
     BOOL neg;
 
     if (*str == '-')
@@ -227,31 +264,31 @@
 }
 
 
-static void th_regex_list_item_init(th_regex_list_item *item)
+static void th_regex_list_item_init(th_regex_list_item_t *item)
 {
-    memset(item, 0, sizeof(th_regex_list_item));
+    memset(item, 0, sizeof(th_regex_list_item_t));
 }
 
 
-static int th_regex_list_add_item(th_regex_list *list, th_regex_list_item *item)
+static int th_regex_list_add_item(th_regex_list_t *list, th_regex_list_item_t *item)
 {
     if (list->items == NULL || list->nitems + 1 >= list->itemssize)
     {
         list->itemssize += 16;
 
         if ((list->items = th_realloc(list->items,
-            list->itemssize * sizeof(th_regex_list_item))) == NULL)
+            list->itemssize * sizeof(th_regex_list_item_t))) == NULL)
             return THERR_MALLOC;
     }
 
-    memcpy(list->items + list->nitems, item, sizeof(th_regex_list_item));
+    memcpy(&list->items[list->nitems], item, sizeof(th_regex_list_item_t));
     list->nitems++;
 
     return THERR_OK;
 }
 
 
-static void th_regex_list_free(th_regex_list *list)
+static void th_regex_list_free(th_regex_list_t *list)
 {
     if (list != NULL)
     {
@@ -264,11 +301,11 @@
 }
 
 
-static int th_regex_parse_list(const th_regex_char *str,
-    const size_t slen, th_regex_list *list)
+static int th_regex_parse_list(const th_regex_char_t *str,
+    const size_t slen, th_regex_list_t *list)
 {
-    th_regex_char *tmp = NULL;
-    th_regex_list_item item;
+    th_regex_char_t *tmp = NULL;
+    th_regex_list_item_t item;
     int res;
 
     if ((res = th_regex_strndup(&tmp, str, slen)) != THERR_OK)
@@ -277,7 +314,7 @@
     // Handle ranges like [A-Z]
     for (size_t offs = 0; offs < slen; offs++)
     {
-        th_regex_char
+        th_regex_char_t
             *prev = (offs > 0) ? tmp + offs - 1 : NULL,
             *curr = tmp + offs,
             *next = (offs + 1 < slen) ? tmp + offs + 1 : NULL;
@@ -319,14 +356,14 @@
 
     for (size_t offs = 0; offs < slen; offs++)
     {
-        th_regex_char curr = tmp[offs];
+        th_regex_char_t curr = tmp[offs];
         if (curr != 0)
             item.nchars++;
     }
 
     if (item.nchars > 0)
     {
-        if ((item.chars = th_malloc(sizeof(th_regex_char) * item.nchars)) == NULL)
+        if ((item.chars = th_malloc(sizeof(th_regex_char_t) * item.nchars)) == NULL)
         {
             res = THERR_MALLOC;
             goto out;
@@ -334,7 +371,7 @@
 
         for (size_t offs = 0, n = 0; offs < slen; offs++)
         {
-            th_regex_char curr = tmp[offs];
+            th_regex_char_t curr = tmp[offs];
             if (curr != 0)
             {
                 item.chars[n] = curr;
@@ -355,12 +392,12 @@
 }
 
 
-int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern)
+int th_regex_compile(th_regex_t **pexpr, const th_regex_char_t *pattern)
 {
     int res = THERR_OK;
-    th_regex_parse_ctx ctx;
-    th_regex_node node, *pnode;
-    th_regex_char *tmp = NULL;
+    th_regex_parse_ctx_t ctx;
+    th_regex_node_t node, *pnode;
+    th_regex_char_t *tmp = NULL;
     size_t start;
 
     if (pexpr == NULL || pattern == NULL)
@@ -374,14 +411,14 @@
 
     for (; ctx.pattern[ctx.offs] != 0; ctx.offs++)
     {
-        th_regex_char cch = ctx.pattern[ctx.offs];
+        th_regex_char_t cch = ctx.pattern[ctx.offs];
         DBG_RE_COMPILE("[%" PRIu_SIZE_T "] '%c'\n", ctx.offs, cch);
         switch (cch)
         {
             case '?':
             case '*':
             case '+':
-                if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK)
+                if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK)
                     goto exit;
 
                 if (cch == '?')
@@ -433,7 +470,7 @@
 
                 th_free(tmp);
 
-                if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK ||
+                if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK ||
                     (res = th_regex_strndup(&tmp, ctx.pattern + start,
                     ctx.offs - start)) != THERR_OK)
                     goto exit;
@@ -471,7 +508,7 @@
 
             case '(':
                 // Start of subpattern
-                if ((res = th_regex_ctx_push(&ctx)) != THERR_OK)
+                if ((res = th_regex_parse_ctx_push(&ctx)) != THERR_OK)
                     goto exit;
                 break;
 
@@ -480,8 +517,8 @@
                 th_regex_node_init(&node);
                 node.type = TH_RE_TYPE_SUBEXPR;
 
-                if ((res = th_regex_ctx_pop(&ctx, &node.match.expr)) != THERR_OK ||
-                    (res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
+                if ((res = th_regex_parse_ctx_pop(&ctx, &node.match.expr)) != THERR_OK ||
+                    (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
                     goto exit;
                 break;
 
@@ -490,7 +527,7 @@
                 th_regex_node_init(&node);
                 node.mode = TH_RE_MATCH_ANCHOR_START;
 
-                if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
+                if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
                     goto exit;
                 break;
 
@@ -499,7 +536,7 @@
                 th_regex_node_init(&node);
                 node.mode = TH_RE_MATCH_ANCHOR_END;
 
-                if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
+                if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
                     goto exit;
                 break;
 
@@ -513,17 +550,18 @@
                     goto exit;
                 }
 
-                // XXX TODO Parse/support ranges [0-9a-zA-Z_-]
+                th_regex_node_init(&node);
+                if (ctx.pattern[start] == '^')
+                {
+                    node.type = TH_RE_TYPE_LIST_REVERSE;
+                    start++;
+                }
+                else
+                    node.type = TH_RE_TYPE_LIST;
 
-                th_regex_node_init(&node);
-                node.type = (ctx.pattern[start] == '^') ?
-                    TH_RE_TYPE_LIST_REVERSE : TH_RE_TYPE_LIST;
-                node.match.list.nchars = ctx.offs - start;
-                if ((res = th_regex_strndup(&node.match.list.chars,
-                    ctx.pattern + start, node.match.list.nchars)) != THERR_OK)
-                    goto exit;
-
-                if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
+                if ((res = th_regex_parse_list(ctx.pattern + start,
+                    ctx.offs - start, &node.match.list) != THERR_OK) ||
+                    (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
                     goto exit;
                 break;
 
@@ -532,7 +570,7 @@
                 th_regex_node_init(&node);
                 node.type = TH_RE_TYPE_ANY_CHAR;
 
-                if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
+                if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
                     goto exit;
                 break;
 
@@ -545,14 +583,7 @@
                     res = THERR_INVALID_DATA;
                     goto exit;
                 }
-
-                th_regex_node_init(&node);
-                node.type = TH_RE_TYPE_CHAR;
-                node.match.chr = ctx.pattern[ctx.offs];
-
-                if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
-                    goto exit;
-                break;
+                // fall-through
 
             default:
                 // Given character must match
@@ -560,7 +591,7 @@
                 node.type = TH_RE_TYPE_CHAR;
                 node.match.chr = ctx.pattern[ctx.offs];
 
-                if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK)
+                if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
                     goto exit;
                 break;
         }
@@ -573,45 +604,24 @@
 }
 
 
-void th_regex_free(th_regex_ctx *expr)
+void th_regex_free(th_regex_t *expr)
 {
     if (expr != NULL)
     {
-        for (size_t n = 0; n < expr->nnodes; n++)
+        for (size_t nnode = 0; nnode < expr->nnodes; nnode++)
         {
-            th_regex_node *node = &expr->nodes[n];
+            th_regex_node_t *node = &expr->nodes[nnode];
 
             DBG_RE_FREE(
                 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
-                "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
-                n, expr->nnodes,
-                node->mode, node->type,
+                "mode=%s, type=%s, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
+                nnode, expr->nnodes,
+                re_match_modes[node->mode],
+                re_match_types[node->type],
                 node->repeatMin, node->repeatMax);
 
-            switch (node->type)
-            {
-                case TH_RE_TYPE_SUBEXPR:
-                    DBG_RE_FREE("  SUBEXPR: %p vs %p\n",
-                        (void *) expr, (void *) node->match.expr);
-                    th_regex_free(node->match.expr);
-                    break;
-
-                case TH_RE_TYPE_LIST:
-                case TH_RE_TYPE_LIST_REVERSE:
-                    DBG_RE_FREE("  list='%s'\n", node->match.list.chars);
-                    th_free(node->match.list.chars);
-                    break;
-
-#ifdef TH_EXPERIMENTAL_REGEX_DEBUG
-                case TH_RE_TYPE_ANY_CHAR:
-                    DBG_RE_FREE("  any char\n");
-                    break;
-
-                case TH_RE_TYPE_CHAR:
-                    DBG_RE_FREE("  char='%c'\n", node->match.chr);
-                    break;
-#endif
-            }
+            th_regex_free(node->match.expr);
+            th_regex_list_free(&node->match.list);
         }
 
         th_free(expr->nodes);
@@ -619,12 +629,13 @@
 }
 
 
-static BOOL th_regex_do_match_list(const th_regex_list *list, const th_regex_char cch)
+static BOOL th_regex_match_list(const th_regex_list_t *list, const th_regex_char_t cch)
 {
     // Could be optimized, perhaps .. sort match.chars, binary search etc?
     for (size_t nitem = 0; nitem < list->nitems; nitem++)
     {
-        const th_regex_list_item *item = &list->items[nitem];
+        const th_regex_list_item_t *item = &list->items[nitem];
+
         if (item->type == 0)
         {
             for (size_t n = 0; n < item->nchars; n++)
@@ -642,167 +653,170 @@
 }
 
 
-static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
-    const th_regex_char *haystack, size_t *offs, const int flags);
+static BOOL th_regex_match_expr(
+    const th_regex_char_t *haystack,
+    size_t *poffs,
+    const th_regex_t *expr,
+    const size_t startnode,
+    const int flags
+    );
 
 
-static BOOL th_regex_do_match_node(const th_regex_char *haystack,
-    size_t *offs, const th_regex_node *node, const int flags)
+static BOOL th_regex_match_one(
+    const th_regex_char_t *haystack,
+    size_t *poffs,
+    const th_regex_node_t *node,
+    const int flags
+    )
 {
-    th_regex_char cch;
+    th_regex_char_t cch;
     BOOL res = FALSE;
 
-    DBG_RE_MATCH("    node_START [%s]: '%s': ",
-        re_match_types[node->type], haystack + *offs);
-
     switch (node->type)
     {
         case TH_RE_TYPE_SUBEXPR:
-            DBG_RE_MATCH("subexpr ..\n");
-            res = th_regex_do_match_expr(node->match.expr, haystack, offs, flags);
-            return res;
+            // push current context in stack
+            // new context for subexpr
+            res = th_regex_match_expr(haystack, poffs, node->match.expr, 0, flags);
             break;
 
         case TH_RE_TYPE_LIST:
         case TH_RE_TYPE_LIST_REVERSE:
-            DBG_RE_MATCH("[%s]", node->match.list.chars);
-            if ((cch = haystack[*offs]) == 0)
-                goto out;
-
-            // Could be optimized, perhaps .. sort match.chars, binary search etc?
-            // XXX TODO Ranges are not supported yet
-            for (size_t n = 0; n < node->match.list.nchars; n++)
-            if (node->match.list.chars[n] == cch)
+            if ((cch = haystack[*poffs]) == 0)
+                res = FALSE;
+            else
             {
-                res = TRUE;
-                break;
-            }
+                res = th_regex_match_list(&node->match.list, cch);
 
-            if (node->type == TH_RE_TYPE_LIST_REVERSE)
-                res = !res;
+                if (node->type == TH_RE_TYPE_LIST_REVERSE)
+                    res = !res;
 
-            (*offs)++;
+                (*poffs)++;
+            }
             break;
 
         case TH_RE_TYPE_ANY_CHAR:
-            if ((cch = haystack[*offs]) == 0)
-                goto out;
-
-            res = TRUE;
-            (*offs)++;
+            if ((cch = haystack[*poffs]) == 0)
+                res = FALSE;
+            else
+            {
+                res = TRUE;
+                (*poffs)++;
+            }
             break;
 
         case TH_RE_TYPE_CHAR:
-            DBG_RE_MATCH("'%c'", node->match.chr);
-            if ((cch = haystack[*offs]) == 0)
-                goto out;
-
-            res = (cch == node->match.chr);
-            (*offs)++;
+            if ((cch = haystack[*poffs]) == 0)
+                res = FALSE;
+            else
+            {
+                res = (cch == node->match.chr);
+                (*poffs)++;
+            }
             break;
     }
 
-out:
-    DBG_RE_MATCH(", match=%s\n",
-        res ? "YES" : "NO");
-
     return res;
 }
 
 
-static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
-    const th_regex_char *haystack, size_t *offs, const int flags)
+static BOOL th_regex_match_expr(
+    const th_regex_char_t *haystack,
+    size_t *poffs,
+    const th_regex_t *expr,
+    const size_t startnode,
+    const int flags
+    )
 {
-    for (size_t n = 0; n < expr->nnodes; n++)
+    BOOL res = TRUE;
+    size_t soffs = *poffs;
+
+    for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++)
     {
-        const th_regex_node *node = &expr->nodes[n];
-        size_t soffs;
+        const th_regex_node_t *node = &expr->nodes[nnode];
 
-        DBG_RE_MATCH("  expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s ",
-            n, expr->nnodes, re_match_modes[node->mode]);
+        DBG_RE_MATCH("  expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n",
+            nnode, expr->nnodes,
+            re_match_modes[node->mode], re_match_types[node->type],
+            haystack + *poffs);
 
         switch (node->mode)
         {
             case TH_RE_MATCH_ONCE:
-                {
-                    DBG_RE_MATCH("\n");
-                    soffs = *offs;
-                    if (!th_regex_do_match_node(haystack, &soffs, node, flags))
-                        return FALSE;
-                    *offs = soffs;
-                }
+                res = th_regex_match_one(haystack, poffs, node, flags);
                 break;
 
             case TH_RE_MATCH_COUNT_GREEDY:
             case TH_RE_MATCH_COUNT_NONGREEDY:
                 {
-                    BOOL done = FALSE;
                     ssize_t count = 0;
 
-                    DBG_RE_MATCH("min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", node->repeatMin, node->repeatMax);
+                    DBG_RE_MATCH("    COUNT min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n",
+                        node->repeatMin, node->repeatMax);
 
                     do
                     {
                         BOOL match;
-                        soffs = *offs;
+                        size_t toffs = *poffs, tnode = nnode;
 
-                        match = th_regex_do_match_node(haystack, &soffs, node, flags);
-                        for (size_t qn = n + 1; qn < expr->nnodes && haystack[soffs] != 0; qn++)
-                        {
-                            const th_regex_node *next = &expr->nodes[qn];
-                            do {
-                              match = th_regex_do_match_node(haystack, &soffs, next, flags);
-                            } while (haystack[soffs] != 0 && !match);
-                        }
+                        do {
+                            match = th_regex_match_one(haystack, &toffs, node, flags);
+                            if (match && haystack[toffs] != 0)
+                            {
+                                for (tnode = nnode + 1; match && tnode < expr->nnodes && haystack[toffs] != 0; tnode++)
+                                {
+                                    size_t noffs = toffs;
+                                    match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags);
+                                    DBG_RE_MATCH("    '%s': %d\n", haystack + noffs, match);
+                                }
+                            }
+                        } while (!match && haystack[toffs] != 0);
 
                         if (match)
                         {
                             // Node matched
                             count++;
-                            done = (node->repeatMax > 0 && count >= node->repeatMax);
-                            *offs = soffs;
+                            *poffs = soffs;
+                            nnode = tnode;
+                            res = (node->repeatMax > 0 && count >= node->repeatMax);
                         }
                         else
                         {
                             // Node did not match, check if we got the minimum if set
-                            done = (node->repeatMin >= 0 && count >= node->repeatMin);
-                            soffs = *offs;
-                            break;
+                            res = (node->repeatMin >= 0 && count >= node->repeatMin);
                         }
-                    } while (!done);
 
-                    DBG_RE_MATCH("RESULT: %" PRId_SSIZE_T " = %s\n", count, done ? "YES" : "NO");
+                    } while (!res);
 
-                    if (!done)
-                        return FALSE;
-
-                    *offs = soffs;
+                    DBG_RE_MATCH("    RESULT: count=%" PRId_SSIZE_T ", done=%s\n",
+                        count, res ? "YES" : "NO");
                 }
                 break;
 
+
             case TH_RE_MATCH_ANCHOR_START:
-                DBG_RE_MATCH("offs=%" PRIu_SIZE_T "\n", *offs);
-                if (*offs != 0)
-                    return FALSE;
+                res = (*poffs == 0);
                 break;
 
             case TH_RE_MATCH_ANCHOR_END:
-                DBG_RE_MATCH("is end=%d\n", haystack[*offs]);
-                if (haystack[*offs] != 0)
-                    return FALSE;
+                res = (haystack[*poffs] == 0);
                 break;
         }
     }
 
-    return TRUE;
+    if (!res)
+        *poffs = soffs;
+
+    return res;
 }
 
 
-int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack,
-    size_t *pnmatches, th_regex_match_node **pmatches, const size_t maxmatches,
+int th_regex_match(const th_regex_t *expr, const th_regex_char_t *haystack,
+    size_t *pnmatches, th_regex_match_t **pmatches, const size_t maxmatches,
     const int flags)
 {
     size_t nmatches = 0;
+    (void) flags;
 
     if (pnmatches != NULL)
         *pnmatches = 0;
@@ -818,13 +832,12 @@
     // ..
     for (size_t soffs = 0; haystack[soffs] != 0; )
     {
-        BOOL matched;
         size_t coffs = soffs;
 
         DBG_RE_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n",
             soffs, haystack + soffs);
 
-        if ((matched = th_regex_do_match_expr(expr, haystack, &coffs, flags)))
+        if (th_regex_match_expr(haystack, &coffs, expr, 0, flags))
         {
             // A match was found, increase count
             nmatches++;
@@ -835,7 +848,7 @@
 
             if (pmatches != NULL)
             {
-                th_regex_match_node *match = th_malloc0(sizeof(th_regex_match_node));
+                th_regex_match_t *match = th_malloc0(sizeof(th_regex_match_t));
                 if (match == NULL)
                     return THERR_MALLOC;
 
@@ -866,14 +879,14 @@
 }
 
 
-static void th_regex_free_match(th_regex_match_node *node)
+static void th_regex_free_match(th_regex_match_t *node)
 {
     (void) node;
     // Nothing to do here at the moment
 }
 
 
-void th_regex_free_matches(th_regex_match_node *matches)
+void th_regex_free_matches(th_regex_match_t *matches)
 {
     th_llist_free_func_node((th_llist_t *) matches,
         (void (*)(th_llist_t *)) th_regex_free_match);
--- a/th_regex.h	Thu Jan 23 11:38:28 2020 +0200
+++ b/th_regex.h	Fri Jan 24 09:13:24 2020 +0200
@@ -22,10 +22,26 @@
 //
 // Definitions
 //
+
 // Character type
-typedef char th_regex_char;
+typedef char th_regex_char_t;
+
+
+// Structure containing the compiled regular expression
+struct th_regex_t;
+typedef struct th_regex_t th_regex_t;
 
 
+// Linked list structure for returned match sequences
+typedef struct
+{
+    th_llist_t node;
+    size_t start, len;
+} th_regex_match_t;
+
+
+// Flags for th_regex_match()
+// (not actually used or supported yet :D)
 enum
 {
     TH_REF_CASEFOLD          = 0x0001,
@@ -33,59 +49,6 @@
 };
 
 
-typedef struct
-{
-    int type;
-    th_regex_char start, end;
-
-    size_t nchars;
-    th_regex_char *chars;
-} th_regex_list_item;
-
-
-typedef struct
-{
-    size_t nitems, itemssize;
-    th_regex_list_item *items;
-} th_regex_list;
-
-
-struct th_regex_ctx;
-
-typedef struct
-{
-    int mode, type;
-    ssize_t repeatMin, repeatMax;
-
-    union
-    {
-        th_regex_char chr;
-
-        struct
-        {
-            size_t nchars;
-            th_regex_char *chars;
-        } list;
-
-        struct th_regex_ctx *expr;
-    } match;
-} th_regex_node;
-
-
-typedef struct th_regex_ctx
-{
-    size_t nnodes, nodessize;
-    th_regex_node *nodes;
-} th_regex_ctx;
-
-
-typedef struct
-{
-    th_llist_t node;
-    size_t start, len;
-} th_regex_match_node;
-
-
 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG
 enum
 {
@@ -101,13 +64,14 @@
 //
 // Functions
 //
-int      th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern);
-void     th_regex_free(th_regex_ctx *expr);
+int      th_regex_compile(th_regex_t **pexpr, const th_regex_char_t *pattern);
+void     th_regex_free(th_regex_t *expr);
 
-int      th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack,
-         size_t *pnmatches, th_regex_match_node **pmatches, const size_t maxmatches,
+int      th_regex_match(const th_regex_t *expr, const th_regex_char_t *haystack,
+         size_t *pnmatches, th_regex_match_t **pmatches, const size_t maxmatches,
          const int flags);
-void     th_regex_free_matches(th_regex_match_node *matches);
+
+void     th_regex_free_matches(th_regex_match_t *matches);
 
 
 #ifdef __cplusplus