changeset 639:8c957ad9d4c3

Some more work on regex stuff.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 23 Jan 2020 11:38:28 +0200
parents c4bca120bfb0
children 9e1f9e1d1487
files th_regex.c th_regex.h
diffstat 2 files changed, 169 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/th_regex.c	Tue Jan 21 12:23:59 2020 +0200
+++ b/th_regex.c	Thu Jan 23 11:38:28 2020 +0200
@@ -227,6 +227,134 @@
 }
 
 
+static void th_regex_list_item_init(th_regex_list_item *item)
+{
+    memset(item, 0, sizeof(th_regex_list_item));
+}
+
+
+static int th_regex_list_add_item(th_regex_list *list, th_regex_list_item *item)
+{
+    if (list->items == NULL || list->nitems + 1 >= list->itemssize)
+    {
+        list->itemssize += 16;
+
+        if ((list->items = th_realloc(list->items,
+            list->itemssize * sizeof(th_regex_list_item))) == NULL)
+            return THERR_MALLOC;
+    }
+
+    memcpy(list->items + list->nitems, item, sizeof(th_regex_list_item));
+    list->nitems++;
+
+    return THERR_OK;
+}
+
+
+static void th_regex_list_free(th_regex_list *list)
+{
+    if (list != NULL)
+    {
+        for (size_t n = 0; n < list->nitems; n++)
+        {
+            th_free(list->items[n].chars);
+        }
+        th_free(list->items);
+    }
+}
+
+
+static int th_regex_parse_list(const th_regex_char *str,
+    const size_t slen, th_regex_list *list)
+{
+    th_regex_char *tmp = NULL;
+    th_regex_list_item item;
+    int res;
+
+    if ((res = th_regex_strndup(&tmp, str, slen)) != THERR_OK)
+        goto out;
+
+    // Handle ranges like [A-Z]
+    for (size_t offs = 0; offs < slen; offs++)
+    {
+        th_regex_char
+            *prev = (offs > 0) ? tmp + offs - 1 : NULL,
+            *curr = tmp + offs,
+            *next = (offs + 1 < slen) ? tmp + offs + 1 : NULL;
+
+        if (*curr == '-')
+        {
+            if (prev != NULL && next != NULL)
+            {
+                // Range
+                th_regex_list_item_init(&item);
+                item.type = 1;
+                item.start = *prev;
+                item.end = *next;
+
+                if (item.start <= item.end)
+                {
+                    res = THERR_INVALID_DATA;
+                    goto out;
+                }
+
+                *curr = *prev = *next = 0;
+
+                if ((res = th_regex_list_add_item(list, &item)) != THERR_OK)
+                    goto out;
+            }
+            else
+            if (next != NULL)
+            {
+                res = THERR_INVALID_DATA;
+                goto out;
+            }
+        }
+    }
+
+    // Count number of remaining characters
+    th_regex_list_item_init(&item);
+    item.type = 0;
+    item.nchars = 0;
+
+    for (size_t offs = 0; offs < slen; offs++)
+    {
+        th_regex_char curr = tmp[offs];
+        if (curr != 0)
+            item.nchars++;
+    }
+
+    if (item.nchars > 0)
+    {
+        if ((item.chars = th_malloc(sizeof(th_regex_char) * item.nchars)) == NULL)
+        {
+            res = THERR_MALLOC;
+            goto out;
+        }
+
+        for (size_t offs = 0, n = 0; offs < slen; offs++)
+        {
+            th_regex_char curr = tmp[offs];
+            if (curr != 0)
+            {
+                item.chars[n] = curr;
+                n++;
+            }
+        }
+
+        if ((res = th_regex_list_add_item(list, &item)) != THERR_OK)
+        {
+            th_free(item.chars);
+            goto out;
+        }
+    }
+
+out:
+    th_free(tmp);
+    return res;
+}
+
+
 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern)
 {
     int res = THERR_OK;
@@ -491,6 +619,29 @@
 }
 
 
+static BOOL th_regex_do_match_list(const th_regex_list *list, const th_regex_char cch)
+{
+    // Could be optimized, perhaps .. sort match.chars, binary search etc?
+    for (size_t nitem = 0; nitem < list->nitems; nitem++)
+    {
+        const th_regex_list_item *item = &list->items[nitem];
+        if (item->type == 0)
+        {
+            for (size_t n = 0; n < item->nchars; n++)
+            if (item->chars[n] == cch)
+                return TRUE;
+        }
+        else
+        {
+            if (cch >= item->start && cch <= item->end)
+                return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+
 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
     const th_regex_char *haystack, size_t *offs, const int flags);
 
--- a/th_regex.h	Tue Jan 21 12:23:59 2020 +0200
+++ b/th_regex.h	Thu Jan 23 11:38:28 2020 +0200
@@ -18,6 +18,7 @@
 extern "C" {
 #endif
 
+
 //
 // Definitions
 //
@@ -32,6 +33,23 @@
 };
 
 
+typedef struct
+{
+    int type;
+    th_regex_char start, end;
+
+    size_t nchars;
+    th_regex_char *chars;
+} th_regex_list_item;
+
+
+typedef struct
+{
+    size_t nitems, itemssize;
+    th_regex_list_item *items;
+} th_regex_list;
+
+
 struct th_regex_ctx;
 
 typedef struct