Mercurial > hg > th-libs
annotate th_regex.h @ 639:8c957ad9d4c3
Some more work on regex stuff.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 23 Jan 2020 11:38:28 +0200 |
parents | d191ded8a790 |
children | 9e1f9e1d1487 |
rev | line source |
---|---|
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
1 /* |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
2 * Simple regular expression matching functionality |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
3 * Programmed and designed by Matti 'ccr' Hamalainen |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
4 * (C) Copyright 2020 Tecnic Software productions (TNSP) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
5 * |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
6 * Please read file 'COPYING' for information on license and distribution. |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
7 */ |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
8 #ifdef TH_EXPERIMENTAL_REGEX |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
9 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
10 #ifndef TH_REGEX_H |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
11 #define TH_REGEX_H |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
12 |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
13 #include "th_types.h" |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
14 #include "th_datastruct.h" |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
15 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
16 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
17 #ifdef __cplusplus |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
18 extern "C" { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
19 #endif |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
20 |
639
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
21 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
22 // |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
23 // Definitions |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
24 // |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
25 // Character type |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
26 typedef char th_regex_char; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
27 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
28 |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
29 enum |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
30 { |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
31 TH_REF_CASEFOLD = 0x0001, |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
32 TH_REF_ANCHORED = 0x0002, |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
33 }; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
34 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
35 |
639
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
36 typedef struct |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
37 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
38 int type; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
39 th_regex_char start, end; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
40 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
41 size_t nchars; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
42 th_regex_char *chars; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
43 } th_regex_list_item; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
44 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
45 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
46 typedef struct |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
47 { |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
48 size_t nitems, itemssize; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
49 th_regex_list_item *items; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
50 } th_regex_list; |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
51 |
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
52 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
53 struct th_regex_ctx; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
54 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
55 typedef struct |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
56 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
57 int mode, type; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
58 ssize_t repeatMin, repeatMax; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
59 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
60 union |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
61 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
62 th_regex_char chr; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
63 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
64 struct |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
65 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
66 size_t nchars; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
67 th_regex_char *chars; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
68 } list; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
69 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
70 struct th_regex_ctx *expr; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
71 } match; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
72 } th_regex_node; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
73 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
74 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
75 typedef struct th_regex_ctx |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
76 { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
77 size_t nnodes, nodessize; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
78 th_regex_node *nodes; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
79 } th_regex_ctx; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
80 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
81 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
82 typedef struct |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
83 { |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
84 th_llist_t node; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
85 size_t start, len; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
86 } th_regex_match_node; |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
87 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
88 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
89 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
90 enum |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
91 { |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
92 TH_DBG_RE_COMPILE = 0x0001, |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
93 TH_DBG_RE_FREE = 0x0002, |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
94 TH_DBG_RE_MATCH = 0x0004, |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
95 }; |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
96 |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
97 extern int th_dbg_re_flags; |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
98 #endif |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
99 |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
100 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
101 // |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
102 // Functions |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
103 // |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
104 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern); |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
105 void th_regex_free(th_regex_ctx *expr); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
106 |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
107 int th_regex_match(const th_regex_ctx *expr, const th_regex_char *haystack, |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
108 size_t *pnmatches, th_regex_match_node **pmatches, const size_t maxmatches, |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
109 const int flags); |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
110 void th_regex_free_matches(th_regex_match_node *matches); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
111 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
112 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
113 #ifdef __cplusplus |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
114 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
115 #endif |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
116 #endif // TH_REGEX_H |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
117 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
118 #endif // TH_EXPERIMENTAL_REGEX |