changeset 605:566e6ef41f9d

Initial commit of the highly experimental and unfinished regular expression parsing and matching code + unfinished test harness stuff. Not to be used.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 16 Jan 2020 01:09:00 +0200
parents e789accfca87
children f92f1b3d7fd4
files Makefile.gen tests.c th_regex.c th_regex.h
diffstat 4 files changed, 818 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile.gen	Thu Jan 16 00:49:42 2020 +0200
+++ b/Makefile.gen	Thu Jan 16 01:09:00 2020 +0200
@@ -8,6 +8,7 @@
 CFLAGS += -DTH_USE_INTERNAL_SPRINTF=1
 CFLAGS += -DTH_PRINTF_DEBUG=1
 CFLAGS += -DTH_USE_OPT_ARG=1
+CFLAGS += -DTH_EXPERIMENTAL_REGEX=1
 
 THLIBS  = ./
 
@@ -17,7 +18,7 @@
 THLIBS_A=$(OBJPATH)thlibs.a
 THLIBS_OBJ=th_util.o th_config.o th_string.o th_ioctx.o \
 	th_args.o th_crypto.o th_datastruct.o th_network.o \
-	th_file.o
+	th_file.o th_regex.o
 
 
 TESTS_OBJ=tests.o
--- a/tests.c	Thu Jan 16 00:49:42 2020 +0200
+++ b/tests.c	Thu Jan 16 01:09:00 2020 +0200
@@ -4,6 +4,7 @@
 #include "th_string.h"
 #include "th_crypto.h"
 #include "th_config.h"
+#include "th_regex.h"
 
 
 #define SET_BUF_SIZE           128
@@ -558,6 +559,47 @@
 }
 
 
+typedef struct
+{
+    th_regex_char *str;
+    int flags;
+    BOOL result;
+} test_regex_def;
+
+
+void test_regex_list(const th_regex_char *pattern, const test_regex_def *list)
+{
+    th_regex_ctx *reg = NULL;
+    int res;
+
+
+    printf("pattern '%s'\n", pattern);
+    if ((res = th_regex_compile(&reg, pattern)) != THERR_OK)
+    {
+        THERR("Regex compilation failed: %s\n",
+            th_error_str(res));
+        goto out;
+    }
+
+    for (const test_regex_def *def = list; def->str != NULL; def++)
+    {
+        BOOL matched = FALSE;
+
+        if ((res = th_regex_match(reg, def->str, &matched, NULL, -1, def->flags)) != THERR_OK)
+        {
+            THERR("Regex match returned error: %s\n",
+                th_error_str(res));
+            goto out;
+        }
+
+        printf("  '%s': %s\n", def->str, matched ? "YES" : "NO");
+    }
+
+out:
+    th_regex_free(reg);
+}
+
+
 int main(int argc, char *argv[])
 {
     size_t i1, i2, i3, i4;
@@ -809,6 +851,29 @@
     }
 
     //
+    // Regular expressions
+    //
+    if (test_set_start("Regular expressions"))
+    {
+        th_regex_ctx *reg = NULL;
+        int res;
+
+        res = th_regex_compile(&reg, "z*k+abba fabboa? k{4} [gz]{1,2} foo(bar|zoo)?");
+        if (res != THERR_OK)
+            printf("result: %s\n", th_error_str(res));
+        th_regex_free(reg);
+
+        test_regex_def tst1[] =
+        {
+            { "abcfoabccg"        , 0, TRUE },
+            { "abcbcfoabccg"      , 0, TRUE },
+            { NULL, 0, FALSE }
+        };
+
+        test_regex_list("^a(bc){1,2}fo[oab]*cc?g", tst1);
+    }
+
+    //
     // Print summary and exit
     //
     tprint(1,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/th_regex.c	Thu Jan 16 01:09:00 2020 +0200
@@ -0,0 +1,673 @@
+/*
+ * Simple regular expression matching functionality
+ * Programmed and designed by Matti 'ccr' Hamalainen
+ * (C) Copyright 2020 Tecnic Software productions (TNSP)
+ *
+ * Please read file 'COPYING' for information on license and distribution.
+ */
+#ifdef TH_EXPERIMENTAL_REGEX
+#include "th_util.h"
+#include "th_regex.h"
+//#include "th_datastruct.h"
+
+
+//#define DBG_RE_COMPILE 1
+//#define DBG_RE_FREE 1
+//#define DBG_RE_MATCH 1
+
+#if defined(DBG_RE_COMPILE)
+#    define DBG_RE_PRINT_COMPILE(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
+#else
+#    define DBG_RE_PRINT_COMPILE(...) do { } while (0)
+#endif
+#if defined(DBG_RE_FREE)
+#    define DBG_RE_PRINT_FREE(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
+#else
+#    define DBG_RE_PRINT_FREE(...) do { } while (0)
+#endif
+#if defined(DBG_RE_MATCH)
+#    define DBG_RE_PRINT_MATCH(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
+#else
+#    define DBG_RE_PRINT_MATCH(...) do { } while (0)
+#endif
+
+
+enum
+{
+    TH_RE_MATCH_ONCE,
+    TH_RE_MATCH_COUNT,
+    TH_RE_MATCH_ANCHOR_START,
+    TH_RE_MATCH_ANCHOR_END,
+};
+
+
+enum
+{
+    TH_RE_TYPE_CHAR,
+    TH_RE_TYPE_ANY_CHAR,
+    TH_RE_TYPE_LIST,
+    TH_RE_TYPE_LIST_REVERSE,
+    TH_RE_TYPE_SUBEXPR,
+};
+
+
+#if defined(DBG_RE_COMPILE) || defined(DBG_RE_FREE) || defined(DBG_RE_MATCH)
+static const char *re_match_modes[] =
+{
+    "ONCE",
+    "COUNT",
+    "ANCHOR_START",
+    "ANCHOR_END",
+};
+
+static const char *re_match_types[] =
+{
+    "CHAR",
+    "ANY",
+    "LIST",
+    "LIST_REVERSE",
+    "SUBEXPR",
+};
+#endif
+
+
+typedef struct
+{
+    const th_regex_char *pattern;
+    size_t offs;
+
+    th_regex_ctx *data;
+
+    size_t nstack, stacksize;
+    th_regex_ctx **stack;
+} th_regex_parse_ctx;
+
+
+static void th_regex_node_init(th_regex_node *node)
+{
+    memset(node, 0, sizeof(th_regex_node));
+    node->mode = TH_RE_MATCH_ONCE;
+}
+
+
+static int th_regex_strndup(th_regex_char **pdst,
+    const th_regex_char *src, const size_t len)
+{
+    if (pdst == NULL)
+        return THERR_NULLPTR;
+
+    if (UINTPTR_MAX / sizeof(th_regex_char) < len + 1)
+        return THERR_BOUNDS;
+
+    if ((*pdst = (th_regex_char *)
+        th_malloc((len + 1) * sizeof(th_regex_char))) == NULL)
+        return THERR_MALLOC;
+
+    memcpy(*pdst, src, len * sizeof(th_regex_char));
+    (*pdst)[len] = 0;
+
+    return THERR_OK;
+}
+
+
+static int th_regex_ctx_get_prev_node(
+    th_regex_parse_ctx *ctx, th_regex_node **pnode)
+{
+    if (ctx->data != NULL && ctx->data->nnodes > 0)
+    {
+        *pnode = &ctx->data->nodes[ctx->data->nnodes - 1];
+        return THERR_OK;
+    }
+    else
+        return THERR_INVALID_DATA;
+}
+
+
+static int th_regex_ctx_push(th_regex_parse_ctx *ctx)
+{
+    if (ctx->stack == NULL || ctx->nstack + 1 >= ctx->stacksize)
+    {
+        ctx->stacksize += 16;
+
+        if ((ctx->stack = th_realloc(ctx->stack,
+            ctx->stacksize * sizeof(th_regex_node *))) == NULL)
+            return THERR_MALLOC;
+    }
+
+    ctx->stack[ctx->nstack] = ctx->data;
+    ctx->nstack++;
+    ctx->data = NULL;
+
+    return THERR_OK;
+}
+
+
+static int th_regex_ctx_pop(th_regex_parse_ctx *ctx, th_regex_ctx **data)
+{
+    if (ctx->nstack > 0)
+    {
+        *data = ctx->data;
+        ctx->nstack--;
+        ctx->data = ctx->stack[ctx->nstack];
+        return THERR_OK;
+    }
+    else
+        return THERR_INVALID_DATA;
+}
+
+
+static int th_regex_ctx_node_commit(th_regex_parse_ctx *ctx, th_regex_node *node)
+{
+    th_regex_ctx *data = ctx->data;
+
+    if (data == NULL)
+    {
+        if ((data = ctx->data = th_malloc0(sizeof(th_regex_ctx))) == NULL)
+            return THERR_MALLOC;
+    }
+
+    if (data->nodes == NULL || data->nnodes + 1 >= data->nodessize)
+    {
+        data->nodessize += 16;
+        if ((data->nodes = th_realloc(data->nodes,
+            data->nodessize * sizeof(th_regex_node))) == NULL)
+            return THERR_MALLOC;
+    }
+
+    memcpy(&data->nodes[data->nnodes], node, sizeof(th_regex_node));
+    data->nnodes++;
+
+    DBG_RE_PRINT_COMPILE(
+        "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
+        "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
+        data->nnodes, data->nodessize,
+        node->mode, node->type,
+        node->repeatMin, node->repeatMax);
+
+    return THERR_OK;
+}
+
+
+static BOOL th_regex_find_next(const th_regex_char *str,
+    const size_t start, size_t *offs,
+    const th_regex_char delim)
+{
+    for (*offs = start; str[*offs] != 0; (*offs)++)
+    {
+        if (str[*offs] == delim)
+            return TRUE;
+    }
+    return FALSE;
+}
+
+
+static BOOL th_regex_parse_ssize_t(const th_regex_char *str,
+    ssize_t *value)
+{
+    th_regex_char ch;
+    BOOL neg;
+
+    if (*str == '-')
+    {
+        str++;
+        neg = TRUE;
+    }
+    else
+        neg = FALSE;
+
+    // Is the value negative?
+    while ((ch = *str++))
+    {
+        if (ch >= '0' && ch <= '9')
+        {
+            *value *= 10;
+            *value += ch - '0';
+        }
+        else
+            return FALSE;
+    }
+
+    if (neg)
+        *value = -(*value);
+
+    return TRUE;
+}
+
+
+static int th_regex_compile_do(th_regex_parse_ctx *ctx)
+{
+    int res = THERR_OK;
+    th_regex_node node, *pnode;
+    th_regex_char *tmp = NULL;
+    size_t start;
+
+    for (; ctx->pattern[ctx->offs] != 0; ctx->offs++)
+    {
+        DBG_RE_PRINT_COMPILE("[%" PRIu_SIZE_T "] '%c'\n", ctx->offs, ctx->pattern[ctx->offs]);
+        switch (ctx->pattern[ctx->offs])
+        {
+            case '?':
+                // Previous token is optional (repeat 0-1 times)
+                if ((res = th_regex_ctx_get_prev_node(ctx, &pnode)) != THERR_OK)
+                    goto exit;
+
+                pnode->mode = TH_RE_MATCH_COUNT;
+                pnode->repeatMin = 0;
+                pnode->repeatMax = 1;
+                break;
+
+            case '*':
+                // Previous token can repeat 0 or more times
+                if ((res = th_regex_ctx_get_prev_node(ctx, &pnode)) != THERR_OK)
+                    goto exit;
+
+                pnode->mode = TH_RE_MATCH_COUNT;
+                pnode->repeatMin = 0;
+                pnode->repeatMax = -1;
+                break;
+
+            case '+':
+                // Previous token must repeat 1 or more times
+                if ((res = th_regex_ctx_get_prev_node(ctx, &pnode)) != THERR_OK)
+                    goto exit;
+
+                pnode->mode = TH_RE_MATCH_COUNT;
+                pnode->repeatMin = 1;
+                pnode->repeatMax = 1;
+                break;
+
+            case '{':
+                // {n} | {min,max}
+                start = ctx->offs + 1;
+                if (!th_regex_find_next(ctx->pattern, start, &ctx->offs, '}'))
+                {
+                    // End not found
+                    res = THERR_INVALID_DATA;
+                    goto exit;
+                }
+
+                th_free(tmp);
+
+                if ((res = th_regex_ctx_get_prev_node(ctx, &pnode)) != THERR_OK ||
+                    (res = th_regex_strndup(&tmp, ctx->pattern + start,
+                    ctx->offs - start)) != THERR_OK)
+                    goto exit;
+
+                pnode->mode = TH_RE_MATCH_COUNT;
+
+                if (th_regex_find_next(tmp, 0, &start, ','))
+                {
+                    tmp[start] = 0;
+                    if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) ||
+                        !th_regex_parse_ssize_t(tmp + start + 1, &pnode->repeatMax))
+                    {
+                        res = THERR_INVALID_DATA;
+                        goto exit;
+                    }
+                }
+                else
+                {
+                    if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin))
+                    {
+                        res = THERR_INVALID_DATA;
+                        goto exit;
+                    }
+                    pnode->repeatMax = pnode->repeatMin;
+                }
+
+                if (pnode->repeatMin < 0 || pnode->repeatMax < 1 ||
+                    pnode->repeatMax < pnode->repeatMin)
+                {
+                    // Invalid repeat counts
+                    res = THERR_INVALID_DATA;
+                    goto exit;
+                }
+                break;
+
+            case '(':
+                // Start of subpattern
+                if ((res = th_regex_ctx_push(ctx)) != THERR_OK)
+                    goto exit;
+                break;
+
+            case ')':
+                // End of subpattern
+                th_regex_node_init(&node);
+                node.type = TH_RE_TYPE_SUBEXPR;
+
+                if ((res = th_regex_ctx_pop(ctx, &node.match.expr)) != THERR_OK ||
+                    (res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK)
+                    goto exit;
+                break;
+
+            case '^':
+                // Start of line anchor
+                th_regex_node_init(&node);
+                node.mode = TH_RE_MATCH_ANCHOR_START;
+
+                if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK)
+                    goto exit;
+                break;
+
+            case '$':
+                // End of line anchor
+                th_regex_node_init(&node);
+                node.mode = TH_RE_MATCH_ANCHOR_END;
+
+                if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK)
+                    goto exit;
+                break;
+
+            case '[':
+                // Start of char list
+                start = ctx->offs + 1;
+                if (!th_regex_find_next(ctx->pattern, start, &ctx->offs, ']') ||
+                    ctx->offs == start)
+                {
+                    res = THERR_INVALID_DATA;
+                    goto exit;
+                }
+
+                th_regex_node_init(&node);
+                node.type = (ctx->pattern[start] == '^') ?
+                    TH_RE_TYPE_LIST_REVERSE : TH_RE_TYPE_LIST;
+                node.match.list.nchars = ctx->offs - start;
+                if ((res = th_regex_strndup(&node.match.list.chars,
+                    ctx->pattern + start, node.match.list.nchars)) != THERR_OK)
+                    goto exit;
+
+                if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK)
+                    goto exit;
+                break;
+
+            case '.':
+                // Any single character matches
+                th_regex_node_init(&node);
+                node.type = TH_RE_TYPE_ANY_CHAR;
+
+                if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK)
+                    goto exit;
+                break;
+
+            case '\\':
+                // Literal escape
+                ctx->offs++;
+                if (ctx->pattern[ctx->offs] == 0)
+                {
+                    // End of pattern, error
+                    res = THERR_INVALID_DATA;
+                    goto exit;
+                }
+
+                th_regex_node_init(&node);
+                node.type = TH_RE_TYPE_CHAR;
+                node.match.chr = ctx->pattern[ctx->offs];
+
+                if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK)
+                    goto exit;
+                break;
+
+            default:
+                // Given character must match
+                th_regex_node_init(&node);
+                node.type = TH_RE_TYPE_CHAR;
+                node.match.chr = ctx->pattern[ctx->offs];
+
+                if ((res = th_regex_ctx_node_commit(ctx, &node)) != THERR_OK)
+                    goto exit;
+                break;
+        }
+    }
+
+exit:
+    th_free(tmp);
+    return res;
+}
+
+
+int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern)
+{
+    th_regex_parse_ctx ctx;
+    int res = THERR_OK;
+
+    if (pexpr == NULL || pattern == NULL)
+        return THERR_NULLPTR;
+
+    memset(&ctx, 0, sizeof(ctx));
+    ctx.pattern = pattern;
+    res = th_regex_compile_do(&ctx);
+    *pexpr = ctx.data;
+
+    return res;
+}
+
+
+void th_regex_free(th_regex_ctx *expr)
+{
+    if (expr != NULL)
+    {
+        for (size_t n = 0; n < expr->nnodes; n++)
+        {
+            th_regex_node *node = &expr->nodes[n];
+
+            DBG_RE_PRINT_FREE(
+                "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: "
+                "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n",
+                n, expr->nnodes,
+                node->mode, node->type,
+                node->repeatMin, node->repeatMax);
+
+            switch (node->type)
+            {
+                case TH_RE_TYPE_SUBEXPR:
+                    DBG_RE_PRINT_FREE("  SUBEXPR: %p vs %p\n", expr, node->match.expr);
+                    th_regex_free(node->match.expr);
+                    break;
+
+                case TH_RE_TYPE_LIST:
+                case TH_RE_TYPE_LIST_REVERSE:
+                    DBG_RE_PRINT_FREE("  list='%s'\n", node->match.list.chars);
+                    th_free(node->match.list.chars);
+                    break;
+
+#ifdef DBG_RE_FREE
+                case TH_RE_TYPE_ANY_CHAR:
+                    DBG_RE_PRINT_FREE("  any char\n");
+                    break;
+
+                case TH_RE_TYPE_CHAR:
+                    DBG_RE_PRINT_FREE("  char='%c'\n", node->match.chr);
+                    break;
+#endif
+            }
+        }
+
+        th_free(expr->nodes);
+    }
+}
+
+
+static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
+    const th_regex_char *haystack, size_t *offs, const int flags);
+
+
+static BOOL th_regex_do_match_node(const th_regex_char *haystack,
+    size_t *offs, const th_regex_node *node, const int flags)
+{
+    th_regex_char cch;
+    BOOL ret = FALSE;
+
+    DBG_RE_PRINT_MATCH("node_START [%s]: '%s': ", re_match_types[node->type], haystack + *offs);
+
+    switch (node->type)
+    {
+        case TH_RE_TYPE_SUBEXPR:
+            DBG_RE_PRINT_MATCH("subexpr ..\n");
+            ret = th_regex_do_match_expr(node->match.expr, haystack, offs, flags);
+            break;
+
+        case TH_RE_TYPE_LIST:
+        case TH_RE_TYPE_LIST_REVERSE:
+            DBG_RE_PRINT_MATCH("[%s]\n", node->match.list.chars);
+            ret = FALSE;
+            if ((cch = haystack[*offs]) == 0)
+                goto out;
+
+            // Could be optimized, perhaps .. sort match.chars, binary search etc?
+            // XXX TODO Ranges and escapes are not supported yet
+            for (size_t n = 0; n < node->match.list.nchars; n++)
+            if (node->match.list.chars[n] == cch)
+            {
+                ret = TRUE;
+                break;
+            }
+
+            if (node->type == TH_RE_TYPE_LIST_REVERSE)
+                ret = !ret;
+
+            (*offs)++;
+            break;
+
+        case TH_RE_TYPE_ANY_CHAR:
+            DBG_RE_PRINT_MATCH("\n");
+            if ((cch = haystack[*offs]) == 0)
+            {
+                ret = FALSE;
+                goto out;
+            }
+
+            ret = TRUE;
+            (*offs)++;
+            break;
+
+        case TH_RE_TYPE_CHAR:
+            DBG_RE_PRINT_MATCH("'%c'\n", node->match.chr);
+            if ((cch = haystack[*offs]) == 0)
+            {
+                ret = FALSE;
+                goto out;
+            }
+
+            ret = (cch == node->match.chr);
+            (*offs)++;
+            break;
+    }
+
+out:
+    DBG_RE_PRINT_MATCH("node_DONE  [%s]: match %s\n", re_match_types[node->type], ret ? "YES" : "NO");
+    return ret;
+}
+
+
+static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
+    const th_regex_char *haystack, size_t *offs, const int flags)
+{
+    for (size_t n = 0; n < expr->nnodes; n++)
+    {
+        const th_regex_node *node = &expr->nodes[n];
+        size_t soffs;
+
+        DBG_RE_PRINT_MATCH("expr %p [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s ",
+            (void *) expr, n, expr->nnodes, re_match_modes[node->mode]);
+
+        switch (node->mode)
+        {
+            case TH_RE_MATCH_ONCE:
+                {
+                    DBG_RE_PRINT_MATCH("\n");
+                    soffs = *offs;
+                    if (!th_regex_do_match_node(haystack, &soffs, node, flags))
+                        return FALSE;
+                    *offs = soffs;
+                }
+                break;
+
+            case TH_RE_MATCH_COUNT:
+                {
+                    BOOL done = FALSE, match = FALSE;
+                    ssize_t count = 0;
+
+                    DBG_RE_PRINT_MATCH("min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", node->repeatMin, node->repeatMax);
+
+                    do
+                    {
+                        soffs = *offs;
+                        if ((match = th_regex_do_match_node(haystack, &soffs, node, flags)))
+                        {
+                            // Node matched
+                            count++;
+                            done = (node->repeatMax > 0 && count >= node->repeatMax);
+                            *offs = soffs;
+                        }
+                        else
+                        {
+                            // Node did not match, check if we got the minimum if set
+                            done = (node->repeatMin >= 0 && count >= node->repeatMin);
+                            soffs = *offs;
+                            break;
+                        }
+                    } while (!done);
+
+                    DBG_RE_PRINT_MATCH("RESULT: %" PRId_SSIZE_T " = %s\n", count, done ? "YES" : "NO");
+
+                    if (!done)
+                        return FALSE;
+
+                    *offs = soffs;
+                }
+                break;
+
+            case TH_RE_MATCH_ANCHOR_START:
+                DBG_RE_PRINT_MATCH("offs=%" PRIu_SIZE_T "\n", *offs);
+                if (*offs != 0)
+                    return FALSE;
+                break;
+
+            case TH_RE_MATCH_ANCHOR_END:
+                DBG_RE_PRINT_MATCH("is end=%d\n", haystack[*offs]);
+                if (haystack[*offs] != 0)
+                    return FALSE;
+                break;
+        }
+    }
+
+    return TRUE;
+}
+
+
+int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack,
+    BOOL *pmatched, th_regex_match_node **pmatches, const ssize_t max,
+    const int flags)
+{
+    th_regex_match_node *matches = NULL;
+    size_t soffs, coffs;
+    BOOL matched;
+
+    // Check given pattern and string
+    if (expr == NULL || haystack == NULL)
+        return THERR_NULLPTR;
+
+    // Start matching
+    soffs = coffs = 0;
+
+#if 0
+    while (haystack[soffs] != 0)
+    {
+        if ((matched = th_regex_do_match_expr(expr, haystack, &coffs, flags)))
+        {
+        }
+        else
+        {
+        }
+    }
+#else
+    matched = th_regex_do_match_expr(expr, haystack, &coffs, flags);
+#endif
+
+    if (pmatched != NULL)
+        *pmatched = matched;
+
+    return THERR_OK;
+}
+
+#endif // TH_EXPERIMENTAL_REGEX
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/th_regex.h	Thu Jan 16 01:09:00 2020 +0200
@@ -0,0 +1,78 @@
+/*
+ * Simple regular expression matching functionality
+ * Programmed and designed by Matti 'ccr' Hamalainen
+ * (C) Copyright 2020 Tecnic Software productions (TNSP)
+ *
+ * Please read file 'COPYING' for information on license and distribution.
+ */
+#ifdef TH_EXPERIMENTAL_REGEX
+
+#ifndef TH_REGEX_H
+#define TH_REGEX_H
+
+#include "th_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//
+// Definitions
+//
+// Character type
+typedef char th_regex_char;
+
+
+struct th_regex_ctx;
+
+typedef struct
+{
+    int mode, type;
+    ssize_t repeatMin, repeatMax;
+
+    union
+    {
+        th_regex_char chr;
+
+        struct
+        {
+            size_t nchars;
+            th_regex_char *chars;
+        } list;
+
+        struct th_regex_ctx *expr;
+    } match;
+} th_regex_node;
+
+
+typedef struct th_regex_ctx
+{
+    size_t nnodes, nodessize;
+    th_regex_node *nodes;
+} th_regex_ctx;
+
+
+typedef struct
+{
+//    th_llist_t node;
+    size_t start, len;
+} th_regex_match_node;
+
+
+//
+// Functions
+//
+int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern);
+void th_regex_free(th_regex_ctx *expr);
+
+int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack,
+    BOOL *pmatched, th_regex_match_node **pmatches, const ssize_t max,
+    const int flags);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif // TH_REGEX_H
+
+#endif // TH_EXPERIMENTAL_REGEX