Mercurial > hg > th-libs
comparison th_regex.c @ 643:a2bf1ea05b05
Cleanups.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Fri, 24 Jan 2020 12:06:43 +0200 |
parents | 3a35db5c1873 |
children | b897995101b7 |
comparison
equal
deleted
inserted
replaced
642:3a35db5c1873 | 643:a2bf1ea05b05 |
---|---|
17 int th_dbg_re_flags = 0; | 17 int th_dbg_re_flags = 0; |
18 | 18 |
19 static const char *re_match_modes[] = | 19 static const char *re_match_modes[] = |
20 { | 20 { |
21 "ONCE", | 21 "ONCE", |
22 "COUNT GREEDY", | 22 "COUNT", |
23 "COUNT NONGREEDY", | |
24 "ANCHOR START", | 23 "ANCHOR START", |
25 "ANCHOR END", | 24 "ANCHOR END", |
26 }; | 25 }; |
27 | 26 |
28 static const char *re_match_types[] = | 27 static const char *re_match_types[] = |
81 | 80 |
82 | 81 |
83 enum | 82 enum |
84 { | 83 { |
85 TH_RE_MATCH_ONCE, | 84 TH_RE_MATCH_ONCE, |
86 TH_RE_MATCH_COUNT_GREEDY, | 85 TH_RE_MATCH_COUNT, |
87 TH_RE_MATCH_COUNT_NONGREEDY, | |
88 | 86 |
89 TH_RE_MATCH_ANCHOR_START, | 87 TH_RE_MATCH_ANCHOR_START, |
90 TH_RE_MATCH_ANCHOR_END, | 88 TH_RE_MATCH_ANCHOR_END, |
91 }; | 89 }; |
92 | 90 |
421 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) | 419 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) |
422 goto exit; | 420 goto exit; |
423 | 421 |
424 if (cch == '?') | 422 if (cch == '?') |
425 { | 423 { |
426 // Check if previous was a count | 424 // Previous token is optional (repeat 0-1 times) (non-greedy matching) |
427 if (pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY) | 425 pnode->mode = TH_RE_MATCH_COUNT; |
426 pnode->repeatMin = 0; | |
427 pnode->repeatMax = 1; | |
428 } | |
429 else | |
430 { | |
431 // Check if previous was a count ("**", "*+", etc.) | |
432 if (pnode->mode == TH_RE_MATCH_COUNT) | |
428 { | 433 { |
429 res = THERR_INVALID_DATA; | 434 res = THERR_INVALID_DATA; |
430 goto exit; | 435 goto exit; |
431 } | 436 } |
432 else | 437 |
433 if (pnode->mode != TH_RE_MATCH_COUNT_GREEDY) | 438 pnode->mode = TH_RE_MATCH_COUNT; |
434 { | |
435 // Previous token is optional (repeat 0-1 times) | |
436 pnode->repeatMin = 0; | |
437 pnode->repeatMax = 1; | |
438 } | |
439 | |
440 pnode->mode = TH_RE_MATCH_COUNT_NONGREEDY; | |
441 } | |
442 else | |
443 { | |
444 // Check if previous was a count ("**", "*+", etc.) | |
445 if (pnode->mode == TH_RE_MATCH_COUNT_GREEDY || | |
446 pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY) | |
447 { | |
448 res = THERR_INVALID_DATA; | |
449 goto exit; | |
450 } | |
451 | |
452 pnode->mode = TH_RE_MATCH_COUNT_GREEDY; | |
453 | 439 |
454 if (cch == '*') | 440 if (cch == '*') |
455 { | 441 { |
456 // Previous token can repeat 0 or more times | 442 // Previous token can repeat 0 or more times |
457 pnode->repeatMin = 0; | 443 pnode->repeatMin = 0; |
481 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK || | 467 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK || |
482 (res = th_regex_strndup(&tmp, ctx.pattern + start, | 468 (res = th_regex_strndup(&tmp, ctx.pattern + start, |
483 ctx.offs - start)) != THERR_OK) | 469 ctx.offs - start)) != THERR_OK) |
484 goto exit; | 470 goto exit; |
485 | 471 |
486 pnode->mode = TH_RE_MATCH_COUNT_GREEDY; | 472 pnode->mode = TH_RE_MATCH_COUNT; |
487 | 473 |
488 if (th_regex_find_next(tmp, 0, &start, ',')) | 474 if (th_regex_find_next(tmp, 0, &start, ',')) |
489 { | 475 { |
490 tmp[start] = 0; | 476 tmp[start] = 0; |
491 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) || | 477 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) || |
742 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++) | 728 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++) |
743 { | 729 { |
744 const th_regex_node_t *node = &expr->nodes[nnode]; | 730 const th_regex_node_t *node = &expr->nodes[nnode]; |
745 | 731 |
746 DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n", | 732 DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n", |
747 nnode, expr->nnodes, | 733 nnode + 1, expr->nnodes, |
748 re_match_modes[node->mode], re_match_types[node->type], | 734 re_match_modes[node->mode], re_match_types[node->type], |
749 haystack + *poffs); | 735 haystack + soffs); |
750 | 736 |
751 switch (node->mode) | 737 switch (node->mode) |
752 { | 738 { |
753 case TH_RE_MATCH_ONCE: | 739 case TH_RE_MATCH_ONCE: |
754 res = th_regex_match_one(haystack, poffs, node, flags); | 740 res = th_regex_match_one(haystack, &soffs, node, flags); |
755 break; | 741 break; |
756 | 742 |
757 case TH_RE_MATCH_COUNT_GREEDY: | 743 case TH_RE_MATCH_COUNT: |
758 case TH_RE_MATCH_COUNT_NONGREEDY: | |
759 { | 744 { |
760 ssize_t count = 0; | 745 ssize_t count = 0; |
761 | 746 |
762 DBG_RE_MATCH(" COUNT min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", | 747 DBG_RE_MATCH(" COUNT min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", |
763 node->repeatMin, node->repeatMax); | 748 node->repeatMin, node->repeatMax); |
764 | 749 |
765 do | 750 do |
766 { | 751 { |
767 BOOL match; | 752 BOOL match; |
768 size_t toffs = *poffs, tnode = nnode; | 753 size_t toffs = soffs, tnode; |
769 | 754 |
755 DBG_RE_MATCH(" START '%s'\n", haystack + toffs); | |
770 do { | 756 do { |
771 match = th_regex_match_one(haystack, &toffs, node, flags); | 757 match = TRUE; |
772 if (match && haystack[toffs] != 0) | 758 for (tnode = nnode; match && tnode < expr->nnodes && haystack[toffs] != 0; ) |
773 { | 759 { |
774 for (tnode = nnode + 1; match && tnode < expr->nnodes && haystack[toffs] != 0; tnode++) | 760 match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags); |
775 { | 761 if (match) |
776 size_t noffs = toffs; | 762 tnode++; |
777 match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags); | |
778 DBG_RE_MATCH(" '%s': %d\n", haystack + noffs, match); | |
779 } | |
780 } | 763 } |
764 DBG_RE_MATCH(" '%s': %d (tnode=%" PRIu_SIZE_T ")\n", haystack + toffs, match, tnode); | |
765 if (node->repeatMin >= 0 && match) | |
766 break; | |
781 } while (!match && haystack[toffs] != 0); | 767 } while (!match && haystack[toffs] != 0); |
768 | |
769 DBG_RE_MATCH(" END '%s': %d (tnode=%" PRIu_SIZE_T ")\n", haystack + toffs, match, tnode); | |
782 | 770 |
783 if (match) | 771 if (match) |
784 { | 772 { |
785 // Node matched | 773 // Node matched |
786 count++; | 774 count++; |
787 *poffs = soffs; | 775 soffs = toffs; |
788 nnode = tnode; | 776 nnode = tnode; |
789 res = (node->repeatMax > 0 && count >= node->repeatMax); | 777 res = (node->repeatMax > 0 && count >= node->repeatMax); |
790 } | 778 } |
791 else | 779 else |
792 { | 780 { |
799 DBG_RE_MATCH(" RESULT: count=%" PRId_SSIZE_T ", done=%s\n", | 787 DBG_RE_MATCH(" RESULT: count=%" PRId_SSIZE_T ", done=%s\n", |
800 count, res ? "YES" : "NO"); | 788 count, res ? "YES" : "NO"); |
801 } | 789 } |
802 break; | 790 break; |
803 | 791 |
804 | |
805 case TH_RE_MATCH_ANCHOR_START: | 792 case TH_RE_MATCH_ANCHOR_START: |
806 res = (*poffs == 0); | 793 res = (soffs == 0); |
807 break; | 794 break; |
808 | 795 |
809 case TH_RE_MATCH_ANCHOR_END: | 796 case TH_RE_MATCH_ANCHOR_END: |
810 res = (haystack[*poffs] == 0); | 797 res = (haystack[soffs] == 0); |
811 break; | 798 break; |
812 } | 799 } |
813 } | 800 } |
814 | 801 |
815 if (!res) | 802 if (res) |
816 *poffs = soffs; | 803 *poffs = soffs; |
817 | 804 |
818 return res; | 805 return res; |
819 } | 806 } |
820 | 807 |