comparison th_regex.c @ 639:8c957ad9d4c3

Some more work on regex stuff.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 23 Jan 2020 11:38:28 +0200
parents c4bca120bfb0
children 9e1f9e1d1487
comparison
equal deleted inserted replaced
638:c4bca120bfb0 639:8c957ad9d4c3
222 222
223 if (neg) 223 if (neg)
224 *value = -(*value); 224 *value = -(*value);
225 225
226 return TRUE; 226 return TRUE;
227 }
228
229
230 static void th_regex_list_item_init(th_regex_list_item *item)
231 {
232 memset(item, 0, sizeof(th_regex_list_item));
233 }
234
235
236 static int th_regex_list_add_item(th_regex_list *list, th_regex_list_item *item)
237 {
238 if (list->items == NULL || list->nitems + 1 >= list->itemssize)
239 {
240 list->itemssize += 16;
241
242 if ((list->items = th_realloc(list->items,
243 list->itemssize * sizeof(th_regex_list_item))) == NULL)
244 return THERR_MALLOC;
245 }
246
247 memcpy(list->items + list->nitems, item, sizeof(th_regex_list_item));
248 list->nitems++;
249
250 return THERR_OK;
251 }
252
253
254 static void th_regex_list_free(th_regex_list *list)
255 {
256 if (list != NULL)
257 {
258 for (size_t n = 0; n < list->nitems; n++)
259 {
260 th_free(list->items[n].chars);
261 }
262 th_free(list->items);
263 }
264 }
265
266
267 static int th_regex_parse_list(const th_regex_char *str,
268 const size_t slen, th_regex_list *list)
269 {
270 th_regex_char *tmp = NULL;
271 th_regex_list_item item;
272 int res;
273
274 if ((res = th_regex_strndup(&tmp, str, slen)) != THERR_OK)
275 goto out;
276
277 // Handle ranges like [A-Z]
278 for (size_t offs = 0; offs < slen; offs++)
279 {
280 th_regex_char
281 *prev = (offs > 0) ? tmp + offs - 1 : NULL,
282 *curr = tmp + offs,
283 *next = (offs + 1 < slen) ? tmp + offs + 1 : NULL;
284
285 if (*curr == '-')
286 {
287 if (prev != NULL && next != NULL)
288 {
289 // Range
290 th_regex_list_item_init(&item);
291 item.type = 1;
292 item.start = *prev;
293 item.end = *next;
294
295 if (item.start <= item.end)
296 {
297 res = THERR_INVALID_DATA;
298 goto out;
299 }
300
301 *curr = *prev = *next = 0;
302
303 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK)
304 goto out;
305 }
306 else
307 if (next != NULL)
308 {
309 res = THERR_INVALID_DATA;
310 goto out;
311 }
312 }
313 }
314
315 // Count number of remaining characters
316 th_regex_list_item_init(&item);
317 item.type = 0;
318 item.nchars = 0;
319
320 for (size_t offs = 0; offs < slen; offs++)
321 {
322 th_regex_char curr = tmp[offs];
323 if (curr != 0)
324 item.nchars++;
325 }
326
327 if (item.nchars > 0)
328 {
329 if ((item.chars = th_malloc(sizeof(th_regex_char) * item.nchars)) == NULL)
330 {
331 res = THERR_MALLOC;
332 goto out;
333 }
334
335 for (size_t offs = 0, n = 0; offs < slen; offs++)
336 {
337 th_regex_char curr = tmp[offs];
338 if (curr != 0)
339 {
340 item.chars[n] = curr;
341 n++;
342 }
343 }
344
345 if ((res = th_regex_list_add_item(list, &item)) != THERR_OK)
346 {
347 th_free(item.chars);
348 goto out;
349 }
350 }
351
352 out:
353 th_free(tmp);
354 return res;
227 } 355 }
228 356
229 357
230 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern) 358 int th_regex_compile(th_regex_ctx **pexpr, const th_regex_char *pattern)
231 { 359 {
486 } 614 }
487 } 615 }
488 616
489 th_free(expr->nodes); 617 th_free(expr->nodes);
490 } 618 }
619 }
620
621
622 static BOOL th_regex_do_match_list(const th_regex_list *list, const th_regex_char cch)
623 {
624 // Could be optimized, perhaps .. sort match.chars, binary search etc?
625 for (size_t nitem = 0; nitem < list->nitems; nitem++)
626 {
627 const th_regex_list_item *item = &list->items[nitem];
628 if (item->type == 0)
629 {
630 for (size_t n = 0; n < item->nchars; n++)
631 if (item->chars[n] == cch)
632 return TRUE;
633 }
634 else
635 {
636 if (cch >= item->start && cch <= item->end)
637 return TRUE;
638 }
639 }
640
641 return FALSE;
491 } 642 }
492 643
493 644
494 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr, 645 static BOOL th_regex_do_match_expr(const th_regex_ctx *expr,
495 const th_regex_char *haystack, size_t *offs, const int flags); 646 const th_regex_char *haystack, size_t *offs, const int flags);