Mercurial > hg > th-libs
comparison th_regex.c @ 639:8c957ad9d4c3
Some more work on regex stuff.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 23 Jan 2020 11:38:28 +0200 |
parents | c4bca120bfb0 |
children | 9e1f9e1d1487 |
comparison
equal
deleted
inserted
replaced
638:c4bca120bfb0 | 639:8c957ad9d4c3 |
---|---|
222 | 222 |
223 if (neg) | 223 if (neg) |
224 *value = -(*value); | 224 *value = -(*value); |
225 | 225 |
226 return TRUE; | 226 return TRUE; |
227 } | |
228 | |
229 | |
230 static void th_regex_list_item_init(th_regex_list_item *item) | |
231 { | |
232 memset(item, 0, sizeof(th_regex_list_item)); | |
233 } | |
234 | |
235 | |
236 static int th_regex_list_add_item(th_regex_list *list, th_regex_list_item *item) | |
237 { | |
238 if (list->items == NULL || list->nitems + 1 >= list->itemssize) | |
239 { | |
240 list->itemssize += 16; | |
241 | |
242 if ((list->items = th_realloc(list->items, | |
243 list->itemssize * sizeof(th_regex_list_item))) == NULL) | |
244 return THERR_MALLOC; | |
245 } | |
246 | |
247 memcpy(list->items + list->nitems, item, sizeof(th_regex_list_item)); | |
248 list->nitems++; | |
249 | |
250 return THERR_OK; | |
251 } | |
252 | |
253 | |
254 static void th_regex_list_free(th_regex_list *list) | |
255 { | |
256 if (list != NULL) | |
257 { | |
258 for (size_t n = 0; n < list->nitems; n++) | |
259 { | |
260 th_free(list->items[n].chars); | |
261 } | |
262 th_free(list->items); | |
263 } | |
264 } | |
265 | |
266 | |
267 static int th_regex_parse_list(const th_regex_char *str, | |
268 const size_t slen, th_regex_list *list) | |
269 { | |
270 th_regex_char *tmp = NULL; | |
271 th_regex_list_item item; | |
272 int res; | |
273 | |
274 if ((res = th_regex_strndup(&tmp, str, slen)) != THERR_OK) | |
275 goto out; | |
276 | |
277 // Handle ranges like [A-Z] | |
278 for (size_t offs = 0; offs < slen; offs++) | |
279 { | |
280 th_regex_char | |
281 *prev = (offs > 0) ? tmp + offs - 1 : NULL, | |
282 *curr = tmp + offs, | |
283 *next = (offs + 1 < slen) ? tmp + offs + 1 : NULL; | |
284 | |
285 if (*curr == '-') | |
286 { | |
287 if (prev != NULL && next != NULL) | |
288 { | |
289 // Range | |
290 th_regex_list_item_init(&item); | |
291 item.type = 1; | |
292 item.start = *prev; | |
293 item.end = *next; | |
294 | |
295 if (item.start <= item.end) | |
296 { | |
297 res = THERR_INVALID_DATA; | |
298 goto out; | |
299 } | |
300 | |
301 *curr = *prev = *next = 0; | |
302 | |
303 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK) | |
304 goto out; | |
305 } | |
306 else | |
307 if (next != NULL) | |
308 { | |
309 res = THERR_INVALID_DATA; | |
310 goto out; | |
311 } | |
312 } | |
313 } | |
314 | |
315 // Count number of remaining characters | |
316 th_regex_list_item_init(&item); | |
317 item.type = 0; | |
318 item.nchars = 0; | |
319 | |
320 for (size_t offs = 0; offs < slen; offs++) | |
321 { | |
322 th_regex_char curr = tmp[offs]; | |
323 if (curr != 0) | |
324 item.nchars++; | |
325 } | |
326 | |
327 if (item.nchars > 0) | |
328 { | |
329 if ((item.chars = th_malloc(sizeof(th_regex_char) * item.nchars)) == NULL) | |
330 { | |
331 res = THERR_MALLOC; | |
332 goto out; | |
333 } | |
334 | |
335 for (size_t offs = 0, n = 0; offs < slen; offs++) | |
336 { | |
337 th_regex_char curr = tmp[offs]; | |
338 if (curr != 0) | |
339 { | |
340 item.chars[n] = curr; | |
341 n++; | |
342 } | |
343 } | |
344 | |
345 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK) | |
346 { | |
347 th_free(item.chars); | |
348 goto out; | |
349 } | |
350 } | |
351 | |
352 out: | |
353 th_free(tmp); | |
354 return res; | |
227 } | 355 } |
228 | 356 |
229 | 357 |
230 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern) | 358 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern) |
231 { | 359 { |
486 } | 614 } |
487 } | 615 } |
488 | 616 |
489 th_free(expr->nodes); | 617 th_free(expr->nodes); |
490 } | 618 } |
619 } | |
620 | |
621 | |
622 static BOOL th_regex_do_match_list(const th_regex_list *list, const th_regex_char cch) | |
623 { | |
624 // Could be optimized, perhaps .. sort match.chars, binary search etc? | |
625 for (size_t nitem = 0; nitem < list->nitems; nitem++) | |
626 { | |
627 const th_regex_list_item *item = &list->items[nitem]; | |
628 if (item->type == 0) | |
629 { | |
630 for (size_t n = 0; n < item->nchars; n++) | |
631 if (item->chars[n] == cch) | |
632 return TRUE; | |
633 } | |
634 else | |
635 { | |
636 if (cch >= item->start && cch <= item->end) | |
637 return TRUE; | |
638 } | |
639 } | |
640 | |
641 return FALSE; | |
491 } | 642 } |
492 | 643 |
493 | 644 |
494 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr, | 645 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr, |
495 const th_regex_char *haystack, size_t *offs, const int flags); | 646 const th_regex_char *haystack, size_t *offs, const int flags); |