Mercurial > hg > th-libs
annotate th_regex.c @ 639:8c957ad9d4c3
Some more work on regex stuff.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 23 Jan 2020 11:38:28 +0200 |
parents | c4bca120bfb0 |
children | 9e1f9e1d1487 |
rev | line source |
---|---|
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
1 /* |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
2 * Simple regular expression matching functionality |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
3 * Programmed and designed by Matti 'ccr' Hamalainen |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
4 * (C) Copyright 2020 Tecnic Software productions (TNSP) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
5 * |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
6 * Please read file 'COPYING' for information on license and distribution. |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
7 */ |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
8 #include "th_util.h" |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
9 #include "th_regex.h" |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
10 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
11 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
12 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
13 int th_dbg_re_flags = 0; |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
14 # define DBG_RE_COMPILE(...) do { if (th_dbg_re_flags & TH_DBG_RE_COMPILE) fprintf(stderr, __VA_ARGS__); } while (0) |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
15 # define DBG_RE_FREE(...) do { if (th_dbg_re_flags & TH_DBG_RE_FREE) fprintf(stderr, __VA_ARGS__); } while (0) |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
16 # define DBG_RE_MATCH(...) do { if (th_dbg_re_flags & TH_DBG_RE_MATCH) fprintf(stderr, __VA_ARGS__); } while (0) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
17 #else |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
18 # define DBG_RE_COMPILE(...) |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
19 # define DBG_RE_FREE(...) |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
20 # define DBG_RE_MATCH(...) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
21 #endif |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
22 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
23 |
614
afcaf5e38f56
Disable regex stuff from normal builds.
Matti Hamalainen <ccr@tnsp.org>
parents:
613
diff
changeset
|
24 #ifdef TH_EXPERIMENTAL_REGEX |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
25 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
26 enum |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
27 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
28 TH_RE_MATCH_ONCE, |
613 | 29 TH_RE_MATCH_COUNT_GREEDY, |
30 TH_RE_MATCH_COUNT_NONGREEDY, | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
31 TH_RE_MATCH_ANCHOR_START, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
32 TH_RE_MATCH_ANCHOR_END, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
33 }; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
34 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
35 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
36 enum |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
37 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
38 TH_RE_TYPE_CHAR, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
39 TH_RE_TYPE_ANY_CHAR, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
40 TH_RE_TYPE_LIST, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
41 TH_RE_TYPE_LIST_REVERSE, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
42 TH_RE_TYPE_SUBEXPR, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
43 }; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
44 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
45 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
46 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
47 static const char *re_match_modes[] = |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
48 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
49 "ONCE", |
613 | 50 "COUNT GREEDY", |
51 "COUNT NONGREEDY", | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
52 "ANCHOR_START", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
53 "ANCHOR_END", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
54 }; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
55 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
56 static const char *re_match_types[] = |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
57 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
58 "CHAR", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
59 "ANY", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
60 "LIST", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
61 "LIST_REVERSE", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
62 "SUBEXPR", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
63 }; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
64 #endif |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
65 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
66 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
67 typedef struct |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
68 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
69 const th_regex_char *pattern; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
70 size_t offs; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
71 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
72 th_regex_ctx *data; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
73 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
74 size_t nstack, stacksize; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
75 th_regex_ctx **stack; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
76 } th_regex_parse_ctx; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
77 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
78 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
79 static void th_regex_node_init(th_regex_node *node) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
80 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
81 memset(node, 0, sizeof(th_regex_node)); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
82 node->mode = TH_RE_MATCH_ONCE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
83 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
84 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
85 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
86 static int th_regex_strndup(th_regex_char **pdst, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
87 const th_regex_char *src, const size_t len) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
88 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
89 if (pdst == NULL) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
90 return THERR_NULLPTR; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
91 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
92 if (UINTPTR_MAX / sizeof(th_regex_char) < len + 1) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
93 return THERR_BOUNDS; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
94 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
95 if ((*pdst = (th_regex_char *) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
96 th_malloc((len + 1) * sizeof(th_regex_char))) == NULL) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
97 return THERR_MALLOC; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
98 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
99 memcpy(*pdst, src, len * sizeof(th_regex_char)); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
100 (*pdst)[len] = 0; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
101 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
102 return THERR_OK; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
103 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
104 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
105 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
106 static int th_regex_ctx_get_prev_node( |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
107 th_regex_parse_ctx *ctx, th_regex_node **pnode) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
108 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
109 if (ctx->data != NULL && ctx->data->nnodes > 0) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
110 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
111 *pnode = &ctx->data->nodes[ctx->data->nnodes - 1]; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
112 return THERR_OK; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
113 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
114 else |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
115 return THERR_INVALID_DATA; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
116 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
117 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
118 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
119 static int th_regex_ctx_push(th_regex_parse_ctx *ctx) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
120 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
121 if (ctx->stack == NULL || ctx->nstack + 1 >= ctx->stacksize) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
122 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
123 ctx->stacksize += 16; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
124 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
125 if ((ctx->stack = th_realloc(ctx->stack, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
126 ctx->stacksize * sizeof(th_regex_node *))) == NULL) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
127 return THERR_MALLOC; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
128 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
129 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
130 ctx->stack[ctx->nstack] = ctx->data; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
131 ctx->nstack++; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
132 ctx->data = NULL; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
133 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
134 return THERR_OK; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
135 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
136 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
137 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
138 static int th_regex_ctx_pop(th_regex_parse_ctx *ctx, th_regex_ctx **data) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
139 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
140 if (ctx->nstack > 0) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
141 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
142 *data = ctx->data; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
143 ctx->nstack--; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
144 ctx->data = ctx->stack[ctx->nstack]; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
145 return THERR_OK; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
146 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
147 else |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
148 return THERR_INVALID_DATA; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
149 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
150 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
151 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
152 static int th_regex_ctx_node_commit(th_regex_parse_ctx *ctx, th_regex_node *node) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
153 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
154 th_regex_ctx *data = ctx->data; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
155 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
156 if (data == NULL) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
157 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
158 if ((data = ctx->data = th_malloc0(sizeof(th_regex_ctx))) == NULL) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
159 return THERR_MALLOC; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
160 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
161 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
162 if (data->nodes == NULL || data->nnodes + 1 >= data->nodessize) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
163 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
164 data->nodessize += 16; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
165 if ((data->nodes = th_realloc(data->nodes, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
166 data->nodessize * sizeof(th_regex_node))) == NULL) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
167 return THERR_MALLOC; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
168 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
169 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
170 memcpy(&data->nodes[data->nnodes], node, sizeof(th_regex_node)); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
171 data->nnodes++; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
172 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
173 DBG_RE_COMPILE( |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
174 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: " |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
175 "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
176 data->nnodes, data->nodessize, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
177 node->mode, node->type, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
178 node->repeatMin, node->repeatMax); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
179 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
180 return THERR_OK; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
181 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
182 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
183 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
184 static BOOL th_regex_find_next(const th_regex_char *str, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
185 const size_t start, size_t *offs, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
186 const th_regex_char delim) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
187 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
188 for (*offs = start; str[*offs] != 0; (*offs)++) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
189 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
190 if (str[*offs] == delim) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
191 return TRUE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
192 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
193 return FALSE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
194 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
195 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
196 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
197 static BOOL th_regex_parse_ssize_t(const th_regex_char *str, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
198 ssize_t *value) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
199 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
200 th_regex_char ch; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
201 BOOL neg; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
202 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
203 if (*str == '-') |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
204 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
205 str++; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
206 neg = TRUE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
207 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
208 else |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
209 neg = FALSE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
210 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
211 // Is the value negative? |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
212 while ((ch = *str++)) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
213 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
214 if (ch >= '0' && ch <= '9') |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
215 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
216 *value *= 10; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
217 *value += ch - '0'; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
218 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
219 else |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
220 return FALSE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
221 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
222 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
223 if (neg) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
224 *value = -(*value); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
225 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
226 return TRUE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
227 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
228 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
229 |
639
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
230 static void th_regex_list_item_init(th_regex_list_item *item) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
231 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
232 memset(item, 0, sizeof(th_regex_list_item)); |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
233 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
234 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
235 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
236 static int th_regex_list_add_item(th_regex_list *list, th_regex_list_item *item) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
237 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
238 if (list->items == NULL || list->nitems + 1 >= list->itemssize) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
239 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
240 list->itemssize += 16; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
241 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
242 if ((list->items = th_realloc(list->items, |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
243 list->itemssize * sizeof(th_regex_list_item))) == NULL) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
244 return THERR_MALLOC; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
245 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
246 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
247 memcpy(list->items + list->nitems, item, sizeof(th_regex_list_item)); |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
248 list->nitems++; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
249 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
250 return THERR_OK; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
251 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
252 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
253 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
254 static void th_regex_list_free(th_regex_list *list) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
255 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
256 if (list != NULL) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
257 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
258 for (size_t n = 0; n < list->nitems; n++) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
259 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
260 th_free(list->items[n].chars); |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
261 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
262 th_free(list->items); |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
263 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
264 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
265 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
266 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
267 static int th_regex_parse_list(const th_regex_char *str, |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
268 const size_t slen, th_regex_list *list) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
269 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
270 th_regex_char *tmp = NULL; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
271 th_regex_list_item item; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
272 int res; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
273 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
274 if ((res = th_regex_strndup(&tmp, str, slen)) != THERR_OK) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
275 goto out; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
276 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
277 // Handle ranges like [A-Z] |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
278 for (size_t offs = 0; offs < slen; offs++) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
279 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
280 th_regex_char |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
281 *prev = (offs > 0) ? tmp + offs - 1 : NULL, |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
282 *curr = tmp + offs, |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
283 *next = (offs + 1 < slen) ? tmp + offs + 1 : NULL; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
284 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
285 if (*curr == '-') |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
286 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
287 if (prev != NULL && next != NULL) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
288 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
289 // Range |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
290 th_regex_list_item_init(&item); |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
291 item.type = 1; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
292 item.start = *prev; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
293 item.end = *next; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
294 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
295 if (item.start <= item.end) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
296 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
297 res = THERR_INVALID_DATA; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
298 goto out; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
299 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
300 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
301 *curr = *prev = *next = 0; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
302 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
303 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
304 goto out; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
305 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
306 else |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
307 if (next != NULL) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
308 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
309 res = THERR_INVALID_DATA; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
310 goto out; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
311 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
312 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
313 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
314 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
315 // Count number of remaining characters |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
316 th_regex_list_item_init(&item); |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
317 item.type = 0; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
318 item.nchars = 0; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
319 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
320 for (size_t offs = 0; offs < slen; offs++) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
321 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
322 th_regex_char curr = tmp[offs]; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
323 if (curr != 0) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
324 item.nchars++; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
325 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
326 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
327 if (item.nchars > 0) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
328 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
329 if ((item.chars = th_malloc(sizeof(th_regex_char) * item.nchars)) == NULL) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
330 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
331 res = THERR_MALLOC; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
332 goto out; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
333 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
334 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
335 for (size_t offs = 0, n = 0; offs < slen; offs++) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
336 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
337 th_regex_char curr = tmp[offs]; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
338 if (curr != 0) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
339 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
340 item.chars[n] = curr; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
341 n++; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
342 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
343 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
344 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
345 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
346 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
347 th_free(item.chars); |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
348 goto out; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
349 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
350 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
351 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
352 out: |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
353 th_free(tmp); |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
354 return res; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
355 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
356 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
357 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
358 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
359 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
360 int res = THERR_OK; |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
361 th_regex_parse_ctx ctx; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
362 th_regex_node node, *pnode; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
363 th_regex_char *tmp = NULL; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
364 size_t start; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
365 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
366 if (pexpr == NULL || pattern == NULL) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
367 { |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
368 res = THERR_NULLPTR; |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
369 goto exit; |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
370 } |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
371 |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
372 memset(&ctx, 0, sizeof(ctx)); |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
373 ctx.pattern = pattern; |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
374 |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
375 for (; ctx.pattern[ctx.offs] != 0; ctx.offs++) |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
376 { |
613 | 377 th_regex_char cch = ctx.pattern[ctx.offs]; |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
378 DBG_RE_COMPILE("[%" PRIu_SIZE_T "] '%c'\n", ctx.offs, cch); |
613 | 379 switch (cch) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
380 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
381 case '?': |
613 | 382 case '*': |
383 case '+': | |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
384 if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
385 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
386 |
613 | 387 if (cch == '?') |
388 { | |
389 // Check if previous was a count | |
390 pnode->mode = (pnode->mode == TH_RE_MATCH_COUNT_GREEDY) ? | |
391 TH_RE_MATCH_COUNT_NONGREEDY : TH_RE_MATCH_COUNT_GREEDY; | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
392 |
613 | 393 // Previous token is optional (repeat 0-1 times) |
394 pnode->repeatMin = 0; | |
395 pnode->repeatMax = 1; | |
396 } | |
397 else | |
398 { | |
399 // Check if previous was a count | |
400 if (pnode->mode == TH_RE_MATCH_COUNT_GREEDY || | |
401 pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY) | |
402 { | |
403 res = THERR_INVALID_DATA; | |
404 goto exit; | |
405 } | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
406 |
613 | 407 pnode->mode = TH_RE_MATCH_COUNT_GREEDY; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
408 |
613 | 409 if (cch == '*') |
410 { | |
411 // Previous token can repeat 0 or more times | |
412 pnode->repeatMin = 0; | |
413 pnode->repeatMax = -1; | |
414 } | |
415 else | |
416 { | |
417 // Previous token must repeat 1 or more times | |
418 pnode->repeatMin = 1; | |
419 pnode->repeatMax = -1; | |
420 } | |
421 } | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
422 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
423 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
424 case '{': |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
425 // {n} | {min,max} |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
426 start = ctx.offs + 1; |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
427 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}')) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
428 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
429 // End not found |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
430 res = THERR_INVALID_DATA; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
431 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
432 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
433 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
434 th_free(tmp); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
435 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
436 if ((res = th_regex_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK || |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
437 (res = th_regex_strndup(&tmp, ctx.pattern + start, |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
438 ctx.offs - start)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
439 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
440 |
613 | 441 pnode->mode = TH_RE_MATCH_COUNT_GREEDY; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
442 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
443 if (th_regex_find_next(tmp, 0, &start, ',')) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
444 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
445 tmp[start] = 0; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
446 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) || |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
447 !th_regex_parse_ssize_t(tmp + start + 1, &pnode->repeatMax)) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
448 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
449 res = THERR_INVALID_DATA; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
450 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
451 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
452 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
453 else |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
454 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
455 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin)) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
456 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
457 res = THERR_INVALID_DATA; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
458 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
459 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
460 pnode->repeatMax = pnode->repeatMin; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
461 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
462 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
463 if (pnode->repeatMin < 0 || pnode->repeatMax < 1 || |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
464 pnode->repeatMax < pnode->repeatMin) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
465 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
466 // Invalid repeat counts |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
467 res = THERR_INVALID_DATA; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
468 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
469 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
470 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
471 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
472 case '(': |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
473 // Start of subpattern |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
474 if ((res = th_regex_ctx_push(&ctx)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
475 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
476 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
477 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
478 case ')': |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
479 // End of subpattern |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
480 th_regex_node_init(&node); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
481 node.type = TH_RE_TYPE_SUBEXPR; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
482 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
483 if ((res = th_regex_ctx_pop(&ctx, &node.match.expr)) != THERR_OK || |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
484 (res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
485 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
486 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
487 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
488 case '^': |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
489 // Start of line anchor |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
490 th_regex_node_init(&node); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
491 node.mode = TH_RE_MATCH_ANCHOR_START; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
492 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
493 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
494 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
495 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
496 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
497 case '$': |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
498 // End of line anchor |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
499 th_regex_node_init(&node); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
500 node.mode = TH_RE_MATCH_ANCHOR_END; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
501 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
502 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
503 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
504 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
505 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
506 case '[': |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
507 // Start of char list |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
508 start = ctx.offs + 1; |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
509 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') || |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
510 ctx.offs == start) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
511 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
512 res = THERR_INVALID_DATA; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
513 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
514 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
515 |
638 | 516 // XXX TODO Parse/support ranges [0-9a-zA-Z_-] |
517 | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
518 th_regex_node_init(&node); |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
519 node.type = (ctx.pattern[start] == '^') ? |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
520 TH_RE_TYPE_LIST_REVERSE : TH_RE_TYPE_LIST; |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
521 node.match.list.nchars = ctx.offs - start; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
522 if ((res = th_regex_strndup(&node.match.list.chars, |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
523 ctx.pattern + start, node.match.list.nchars)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
524 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
525 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
526 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
527 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
528 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
529 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
530 case '.': |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
531 // Any single character matches |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
532 th_regex_node_init(&node); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
533 node.type = TH_RE_TYPE_ANY_CHAR; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
534 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
535 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
536 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
537 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
538 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
539 case '\\': |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
540 // Literal escape |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
541 ctx.offs++; |
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
542 if (ctx.pattern[ctx.offs] == 0) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
543 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
544 // End of pattern, error |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
545 res = THERR_INVALID_DATA; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
546 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
547 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
548 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
549 th_regex_node_init(&node); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
550 node.type = TH_RE_TYPE_CHAR; |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
551 node.match.chr = ctx.pattern[ctx.offs]; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
552 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
553 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
554 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
555 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
556 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
557 default: |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
558 // Given character must match |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
559 th_regex_node_init(&node); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
560 node.type = TH_RE_TYPE_CHAR; |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
561 node.match.chr = ctx.pattern[ctx.offs]; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
562 |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
563 if ((res = th_regex_ctx_node_commit(&ctx, &node)) != THERR_OK) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
564 goto exit; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
565 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
566 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
567 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
568 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
569 exit: |
611
d895b0fd6ad6
Combine code from th_regex_compile() to th_regex_compile_do().
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
570 *pexpr = ctx.data; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
571 th_free(tmp); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
572 return res; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
573 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
574 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
575 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
576 void th_regex_free(th_regex_ctx *expr) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
577 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
578 if (expr != NULL) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
579 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
580 for (size_t n = 0; n < expr->nnodes; n++) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
581 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
582 th_regex_node *node = &expr->nodes[n]; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
583 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
584 DBG_RE_FREE( |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
585 "node [%" PRIu_SIZE_T " / %" PRIu_SIZE_T "]: " |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
586 "mode=%d, type=%d, rmin=%" PRId_SSIZE_T ", rmax=%" PRId_SSIZE_T "\n", |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
587 n, expr->nnodes, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
588 node->mode, node->type, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
589 node->repeatMin, node->repeatMax); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
590 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
591 switch (node->type) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
592 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
593 case TH_RE_TYPE_SUBEXPR: |
638 | 594 DBG_RE_FREE(" SUBEXPR: %p vs %p\n", |
595 (void *) expr, (void *) node->match.expr); | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
596 th_regex_free(node->match.expr); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
597 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
598 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
599 case TH_RE_TYPE_LIST: |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
600 case TH_RE_TYPE_LIST_REVERSE: |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
601 DBG_RE_FREE(" list='%s'\n", node->match.list.chars); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
602 th_free(node->match.list.chars); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
603 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
604 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
605 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
606 case TH_RE_TYPE_ANY_CHAR: |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
607 DBG_RE_FREE(" any char\n"); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
608 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
609 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
610 case TH_RE_TYPE_CHAR: |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
611 DBG_RE_FREE(" char='%c'\n", node->match.chr); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
612 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
613 #endif |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
614 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
615 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
616 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
617 th_free(expr->nodes); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
618 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
619 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
620 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
621 |
639
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
622 static BOOL th_regex_do_match_list(const th_regex_list *list, const th_regex_char cch) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
623 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
624 // Could be optimized, perhaps .. sort match.chars, binary search etc? |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
625 for (size_t nitem = 0; nitem < list->nitems; nitem++) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
626 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
627 const th_regex_list_item *item = &list->items[nitem]; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
628 if (item->type == 0) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
629 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
630 for (size_t n = 0; n < item->nchars; n++) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
631 if (item->chars[n] == cch) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
632 return TRUE; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
633 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
634 else |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
635 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
636 if (cch >= item->start && cch <= item->end) |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
637 return TRUE; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
638 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
639 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
640 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
641 return FALSE; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
642 } |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
643 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
638
diff
changeset
|
644 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
645 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
646 const th_regex_char *haystack, size_t *offs, const int flags); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
647 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
648 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
649 static BOOL th_regex_do_match_node(const th_regex_char *haystack, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
650 size_t *offs, const th_regex_node *node, const int flags) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
651 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
652 th_regex_char cch; |
638 | 653 BOOL res = FALSE; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
654 |
638 | 655 DBG_RE_MATCH(" node_START [%s]: '%s': ", |
656 re_match_types[node->type], haystack + *offs); | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
657 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
658 switch (node->type) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
659 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
660 case TH_RE_TYPE_SUBEXPR: |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
661 DBG_RE_MATCH("subexpr ..\n"); |
638 | 662 res = th_regex_do_match_expr(node->match.expr, haystack, offs, flags); |
663 return res; | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
664 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
665 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
666 case TH_RE_TYPE_LIST: |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
667 case TH_RE_TYPE_LIST_REVERSE: |
638 | 668 DBG_RE_MATCH("[%s]", node->match.list.chars); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
669 if ((cch = haystack[*offs]) == 0) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
670 goto out; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
671 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
672 // Could be optimized, perhaps .. sort match.chars, binary search etc? |
638 | 673 // XXX TODO Ranges are not supported yet |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
674 for (size_t n = 0; n < node->match.list.nchars; n++) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
675 if (node->match.list.chars[n] == cch) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
676 { |
638 | 677 res = TRUE; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
678 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
679 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
680 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
681 if (node->type == TH_RE_TYPE_LIST_REVERSE) |
638 | 682 res = !res; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
683 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
684 (*offs)++; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
685 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
686 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
687 case TH_RE_TYPE_ANY_CHAR: |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
688 if ((cch = haystack[*offs]) == 0) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
689 goto out; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
690 |
638 | 691 res = TRUE; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
692 (*offs)++; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
693 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
694 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
695 case TH_RE_TYPE_CHAR: |
638 | 696 DBG_RE_MATCH("'%c'", node->match.chr); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
697 if ((cch = haystack[*offs]) == 0) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
698 goto out; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
699 |
638 | 700 res = (cch == node->match.chr); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
701 (*offs)++; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
702 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
703 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
704 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
705 out: |
638 | 706 DBG_RE_MATCH(", match=%s\n", |
707 res ? "YES" : "NO"); | |
708 | |
709 return res; | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
710 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
711 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
712 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
713 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr, |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
714 const th_regex_char *haystack, size_t *offs, const int flags) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
715 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
716 for (size_t n = 0; n < expr->nnodes; n++) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
717 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
718 const th_regex_node *node = &expr->nodes[n]; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
719 size_t soffs; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
720 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
721 DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s ", |
613 | 722 n, expr->nnodes, re_match_modes[node->mode]); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
723 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
724 switch (node->mode) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
725 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
726 case TH_RE_MATCH_ONCE: |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
727 { |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
728 DBG_RE_MATCH("\n"); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
729 soffs = *offs; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
730 if (!th_regex_do_match_node(haystack, &soffs, node, flags)) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
731 return FALSE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
732 *offs = soffs; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
733 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
734 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
735 |
613 | 736 case TH_RE_MATCH_COUNT_GREEDY: |
737 case TH_RE_MATCH_COUNT_NONGREEDY: | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
738 { |
613 | 739 BOOL done = FALSE; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
740 ssize_t count = 0; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
741 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
742 DBG_RE_MATCH("min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", node->repeatMin, node->repeatMax); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
743 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
744 do |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
745 { |
613 | 746 BOOL match; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
747 soffs = *offs; |
613 | 748 |
749 match = th_regex_do_match_node(haystack, &soffs, node, flags); | |
750 for (size_t qn = n + 1; qn < expr->nnodes && haystack[soffs] != 0; qn++) | |
751 { | |
752 const th_regex_node *next = &expr->nodes[qn]; | |
753 do { | |
754 match = th_regex_do_match_node(haystack, &soffs, next, flags); | |
755 } while (haystack[soffs] != 0 && !match); | |
756 } | |
757 | |
758 if (match) | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
759 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
760 // Node matched |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
761 count++; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
762 done = (node->repeatMax > 0 && count >= node->repeatMax); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
763 *offs = soffs; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
764 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
765 else |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
766 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
767 // Node did not match, check if we got the minimum if set |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
768 done = (node->repeatMin >= 0 && count >= node->repeatMin); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
769 soffs = *offs; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
770 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
771 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
772 } while (!done); |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
773 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
774 DBG_RE_MATCH("RESULT: %" PRId_SSIZE_T " = %s\n", count, done ? "YES" : "NO"); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
775 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
776 if (!done) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
777 return FALSE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
778 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
779 *offs = soffs; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
780 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
781 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
782 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
783 case TH_RE_MATCH_ANCHOR_START: |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
784 DBG_RE_MATCH("offs=%" PRIu_SIZE_T "\n", *offs); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
785 if (*offs != 0) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
786 return FALSE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
787 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
788 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
789 case TH_RE_MATCH_ANCHOR_END: |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
790 DBG_RE_MATCH("is end=%d\n", haystack[*offs]); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
791 if (haystack[*offs] != 0) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
792 return FALSE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
793 break; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
794 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
795 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
796 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
797 return TRUE; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
798 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
799 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
800 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
801 int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack, |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
802 size_t *pnmatches, th_regex_match_node **pmatches, const size_t maxmatches, |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
803 const int flags) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
804 { |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
805 size_t nmatches = 0; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
806 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
807 if (pnmatches != NULL) |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
808 *pnmatches = 0; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
809 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
810 // Check given pattern and string |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
811 if (expr == NULL || haystack == NULL) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
812 return THERR_NULLPTR; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
813 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
814 // Start matching |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
815 // XXX NOTE .. lots to think about and to take into account: |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
816 // - anchored and unanchored expressions |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
817 // - how to check if the expression has consumed all possibilities? |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
818 // .. |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
819 for (size_t soffs = 0; haystack[soffs] != 0; ) |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
820 { |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
821 BOOL matched; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
822 size_t coffs = soffs; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
823 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
614
diff
changeset
|
824 DBG_RE_MATCH("\nTRY_MATCH @ startoffs=%" PRIu_SIZE_T ": '%s'\n", |
612
cc9ec51b4875
Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents:
611
diff
changeset
|
825 soffs, haystack + soffs); |
cc9ec51b4875
Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents:
611
diff
changeset
|
826 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
827 if ((matched = th_regex_do_match_expr(expr, haystack, &coffs, flags))) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
828 { |
612
cc9ec51b4875
Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents:
611
diff
changeset
|
829 // A match was found, increase count |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
830 nmatches++; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
831 |
612
cc9ec51b4875
Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents:
611
diff
changeset
|
832 // Deliver to caller if required |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
833 if (pnmatches != NULL) |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
834 *pnmatches = nmatches; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
835 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
836 if (pmatches != NULL) |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
837 { |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
838 th_regex_match_node *match = th_malloc0(sizeof(th_regex_match_node)); |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
839 if (match == NULL) |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
840 return THERR_MALLOC; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
841 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
842 match->start = soffs; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
843 match->len = coffs - soffs; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
844 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
845 th_llist_append_node((th_llist_t **) pmatches, (th_llist_t *) match); |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
846 } |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
847 |
612
cc9ec51b4875
Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents:
611
diff
changeset
|
848 // Check match count limit, if set |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
849 if (maxmatches > 0 && nmatches >= maxmatches) |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
850 break; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
851 |
612
cc9ec51b4875
Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents:
611
diff
changeset
|
852 // If offset was not advanced, increase by one |
cc9ec51b4875
Add some comments and debug messages.
Matti Hamalainen <ccr@tnsp.org>
parents:
611
diff
changeset
|
853 // otherwise use end of match offset as new start |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
854 if (soffs == coffs) |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
855 soffs++; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
856 else |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
857 soffs = coffs; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
858 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
859 else |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
860 { |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
861 soffs++; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
862 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
863 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
864 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
865 return THERR_OK; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
866 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
867 |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
868 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
869 static void th_regex_free_match(th_regex_match_node *node) |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
870 { |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
871 (void) node; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
872 // Nothing to do here at the moment |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
873 } |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
874 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
875 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
876 void th_regex_free_matches(th_regex_match_node *matches) |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
877 { |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
878 th_llist_free_func_node((th_llist_t *) matches, |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
879 (void (*)(th_llist_t *)) th_regex_free_match); |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
880 } |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
609
diff
changeset
|
881 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
882 #endif // TH_EXPERIMENTAL_REGEX |