Mercurial > hg > th-libs
annotate th_regex.h @ 715:6f627f4f11db
Fix th_strncasecmp() edge case.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Tue, 28 Apr 2020 18:38:23 +0300 |
parents | 7493d4c9ff77 |
children | 4ca6a3b30fe8 |
rev | line source |
---|---|
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
1 /* |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
2 * Simple regular expression matching functionality |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
3 * Programmed and designed by Matti 'ccr' Hamalainen |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
4 * (C) Copyright 2020 Tecnic Software productions (TNSP) |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
5 * |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
6 * Please read file 'COPYING' for information on license and distribution. |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
7 */ |
655 | 8 /// @file |
9 /// @brief Simple regular expression matching functionality | |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
10 #ifndef TH_REGEX_H |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
11 #define TH_REGEX_H |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
12 |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
13 #include "th_datastruct.h" |
651 | 14 #include "th_ioctx.h" |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
15 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
16 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
17 #ifdef __cplusplus |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
18 extern "C" { |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
19 #endif |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
20 |
639
8c957ad9d4c3
Some more work on regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
635
diff
changeset
|
21 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
22 // |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
23 // Definitions |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
24 // |
640
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
25 |
655 | 26 /** @struct th_regex_t |
27 * Structure containing the tokenized / compiled regular expression. | |
28 */ | |
640
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
29 struct th_regex_t; |
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
30 typedef struct th_regex_t th_regex_t; |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
31 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
32 |
655 | 33 /** @brief |
34 * Linked list structure containing the information for matched | |
35 * sequences returned by th_regex_match(). | |
36 */ | |
640
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
37 typedef struct |
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
38 { |
655 | 39 th_llist_t node; ///< Internal linked list data |
40 | |
669
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
41 int type; ///< Type of this match, TH_RE_MATCH_* |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
42 |
664
c5aa9ada1051
s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents:
655
diff
changeset
|
43 size_t start; ///< Start offset of the match sequence in @p haystack in @c th_char_t units. |
c5aa9ada1051
s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents:
655
diff
changeset
|
44 size_t len; ///< Length of the match sequence in @p haystack in @c th_char_t units. |
640
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
45 } th_regex_match_t; |
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
46 |
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
47 |
669
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
48 /** @brief |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
49 * Flags @c th_regex_match_t @c type field. |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
50 */ |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
51 enum |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
52 { |
669
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
53 TH_RE_MATCH_EXPR, |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
54 TH_RE_MATCH_SUBEXPR, |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
55 }; |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
56 |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
57 |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
58 /** @brief |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
59 * Flags for th_regex_match(). NOTE! Not actually implemented or supported yet! |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
60 */ |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
61 enum |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
62 { |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
63 TH_REF_CASEFOLD = 0x0001, ///< Use case-folding |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
64 TH_REF_ANCHORED = 0x0002, ///< Implicitly consider expression "anchored" even without explicit ^$ |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
65 TH_REF_NEWLINE = 0x0004, ///< IF SET: Anchors will refer to line start/newline instead of string start/end |
7493d4c9ff77
Add some regex flags, features to be implemented "some day".
Matti Hamalainen <ccr@tnsp.org>
parents:
664
diff
changeset
|
66 TH_REF_SUBMATCH = 0x0008, ///< Include sub-expression matches in results |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
67 }; |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
68 |
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
69 |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
70 #ifdef TH_EXPERIMENTAL_REGEX_DEBUG |
651 | 71 extern th_ioctx *th_dbg_fh; |
635
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
72 #endif |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
73 |
d191ded8a790
Improve the experimental regex matching debugging macros.
Matti Hamalainen <ccr@tnsp.org>
parents:
610
diff
changeset
|
74 |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
75 // |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
76 // Functions |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
77 // |
664
c5aa9ada1051
s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents:
655
diff
changeset
|
78 int th_regex_compile(th_regex_t **pexpr, const th_char_t *pattern); |
640
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
79 void th_regex_free(th_regex_t *expr); |
651 | 80 void th_regex_dump(th_ioctx *fh, const int level, const th_regex_t *expr); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
81 |
664
c5aa9ada1051
s/th_regex_char_t/th_char_t/g
Matti Hamalainen <ccr@tnsp.org>
parents:
655
diff
changeset
|
82 int th_regex_match(const th_regex_t *expr, const th_char_t *haystack, |
640
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
83 size_t *pnmatches, th_regex_match_t **pmatches, const size_t maxmatches, |
610
a0e8d9c6300b
A bit more work on the regex stuff.
Matti Hamalainen <ccr@tnsp.org>
parents:
605
diff
changeset
|
84 const int flags); |
640
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
85 |
9e1f9e1d1487
Aaand some more work. Still just a broken concept.
Matti Hamalainen <ccr@tnsp.org>
parents:
639
diff
changeset
|
86 void th_regex_free_matches(th_regex_match_t *matches); |
605
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
87 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
88 |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
89 #ifdef __cplusplus |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
90 } |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
91 #endif |
566e6ef41f9d
Initial commit of the highly experimental and unfinished regular expression
Matti Hamalainen <ccr@tnsp.org>
parents:
diff
changeset
|
92 #endif // TH_REGEX_H |