# HG changeset patch # User Matti Hamalainen # Date 1579772308 -7200 # Node ID 8c957ad9d4c3668753b07c3127876ee82abd5d61 # Parent c4bca120bfb0441ab2b1eb2922d54e0c2529a808 Some more work on regex stuff. diff -r c4bca120bfb0 -r 8c957ad9d4c3 th_regex.c --- a/th_regex.c Tue Jan 21 12:23:59 2020 +0200 +++ b/th_regex.c Thu Jan 23 11:38:28 2020 +0200 @@ -227,6 +227,134 @@ } +static void th_regex_list_item_init(th_regex_list_item *item) +{ + memset(item, 0, sizeof(th_regex_list_item)); +} + + +static int th_regex_list_add_item(th_regex_list *list, th_regex_list_item *item) +{ + if (list->items == NULL || list->nitems + 1 >= list->itemssize) + { + list->itemssize += 16; + + if ((list->items = th_realloc(list->items, + list->itemssize * sizeof(th_regex_list_item))) == NULL) + return THERR_MALLOC; + } + + memcpy(list->items + list->nitems, item, sizeof(th_regex_list_item)); + list->nitems++; + + return THERR_OK; +} + + +static void th_regex_list_free(th_regex_list *list) +{ + if (list != NULL) + { + for (size_t n = 0; n < list->nitems; n++) + { + th_free(list->items[n].chars); + } + th_free(list->items); + } +} + + +static int th_regex_parse_list(const th_regex_char *str, + const size_t slen, th_regex_list *list) +{ + th_regex_char *tmp = NULL; + th_regex_list_item item; + int res; + + if ((res = th_regex_strndup(&tmp, str, slen)) != THERR_OK) + goto out; + + // Handle ranges like [A-Z] + for (size_t offs = 0; offs < slen; offs++) + { + th_regex_char + *prev = (offs > 0) ? tmp + offs - 1 : NULL, + *curr = tmp + offs, + *next = (offs + 1 < slen) ? tmp + offs + 1 : NULL; + + if (*curr == '-') + { + if (prev != NULL && next != NULL) + { + // Range + th_regex_list_item_init(&item); + item.type = 1; + item.start = *prev; + item.end = *next; + + if (item.start <= item.end) + { + res = THERR_INVALID_DATA; + goto out; + } + + *curr = *prev = *next = 0; + + if ((res = th_regex_list_add_item(list, &item)) != THERR_OK) + goto out; + } + else + if (next != NULL) + { + res = THERR_INVALID_DATA; + goto out; + } + } + } + + // Count number of remaining characters + th_regex_list_item_init(&item); + item.type = 0; + item.nchars = 0; + + for (size_t offs = 0; offs < slen; offs++) + { + th_regex_char curr = tmp[offs]; + if (curr != 0) + item.nchars++; + } + + if (item.nchars > 0) + { + if ((item.chars = th_malloc(sizeof(th_regex_char) * item.nchars)) == NULL) + { + res = THERR_MALLOC; + goto out; + } + + for (size_t offs = 0, n = 0; offs < slen; offs++) + { + th_regex_char curr = tmp[offs]; + if (curr != 0) + { + item.chars[n] = curr; + n++; + } + } + + if ((res = th_regex_list_add_item(list, &item)) != THERR_OK) + { + th_free(item.chars); + goto out; + } + } + +out: + th_free(tmp); + return res; +} + + int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern) { int res = THERR_OK; @@ -491,6 +619,29 @@ } +static BOOL th_regex_do_match_list(const th_regex_list *list, const th_regex_char cch) +{ + // Could be optimized, perhaps .. sort match.chars, binary search etc? + for (size_t nitem = 0; nitem < list->nitems; nitem++) + { + const th_regex_list_item *item = &list->items[nitem]; + if (item->type == 0) + { + for (size_t n = 0; n < item->nchars; n++) + if (item->chars[n] == cch) + return TRUE; + } + else + { + if (cch >= item->start && cch <= item->end) + return TRUE; + } + } + + return FALSE; +} + + static BOOL th_regex_do_match_expr(const th_regex_ctx *expr, const th_regex_char *haystack, size_t *offs, const int flags); diff -r c4bca120bfb0 -r 8c957ad9d4c3 th_regex.h --- a/th_regex.h Tue Jan 21 12:23:59 2020 +0200 +++ b/th_regex.h Thu Jan 23 11:38:28 2020 +0200 @@ -18,6 +18,7 @@ extern "C" { #endif + // // Definitions // @@ -32,6 +33,23 @@ }; +typedef struct +{ + int type; + th_regex_char start, end; + + size_t nchars; + th_regex_char *chars; +} th_regex_list_item; + + +typedef struct +{ + size_t nitems, itemssize; + th_regex_list_item *items; +} th_regex_list; + + struct th_regex_ctx; typedef struct