comparison th_regex.c @ 643:a2bf1ea05b05

Cleanups.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 24 Jan 2020 12:06:43 +0200
parents 3a35db5c1873
children b897995101b7
comparison
equal deleted inserted replaced
642:3a35db5c1873 643:a2bf1ea05b05
17 int th_dbg_re_flags = 0; 17 int th_dbg_re_flags = 0;
18 18
19 static const char *re_match_modes[] = 19 static const char *re_match_modes[] =
20 { 20 {
21 "ONCE", 21 "ONCE",
22 "COUNT GREEDY", 22 "COUNT",
23 "COUNT NONGREEDY",
24 "ANCHOR START", 23 "ANCHOR START",
25 "ANCHOR END", 24 "ANCHOR END",
26 }; 25 };
27 26
28 static const char *re_match_types[] = 27 static const char *re_match_types[] =
81 80
82 81
83 enum 82 enum
84 { 83 {
85 TH_RE_MATCH_ONCE, 84 TH_RE_MATCH_ONCE,
86 TH_RE_MATCH_COUNT_GREEDY, 85 TH_RE_MATCH_COUNT,
87 TH_RE_MATCH_COUNT_NONGREEDY,
88 86
89 TH_RE_MATCH_ANCHOR_START, 87 TH_RE_MATCH_ANCHOR_START,
90 TH_RE_MATCH_ANCHOR_END, 88 TH_RE_MATCH_ANCHOR_END,
91 }; 89 };
92 90
421 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) 419 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK)
422 goto exit; 420 goto exit;
423 421
424 if (cch == '?') 422 if (cch == '?')
425 { 423 {
426 // Check if previous was a count 424 // Previous token is optional (repeat 0-1 times) (non-greedy matching)
427 if (pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY) 425 pnode->mode = TH_RE_MATCH_COUNT;
426 pnode->repeatMin = 0;
427 pnode->repeatMax = 1;
428 }
429 else
430 {
431 // Check if previous was a count ("**", "*+", etc.)
432 if (pnode->mode == TH_RE_MATCH_COUNT)
428 { 433 {
429 res = THERR_INVALID_DATA; 434 res = THERR_INVALID_DATA;
430 goto exit; 435 goto exit;
431 } 436 }
432 else 437
433 if (pnode->mode != TH_RE_MATCH_COUNT_GREEDY) 438 pnode->mode = TH_RE_MATCH_COUNT;
434 {
435 // Previous token is optional (repeat 0-1 times)
436 pnode->repeatMin = 0;
437 pnode->repeatMax = 1;
438 }
439
440 pnode->mode = TH_RE_MATCH_COUNT_NONGREEDY;
441 }
442 else
443 {
444 // Check if previous was a count ("**", "*+", etc.)
445 if (pnode->mode == TH_RE_MATCH_COUNT_GREEDY ||
446 pnode->mode == TH_RE_MATCH_COUNT_NONGREEDY)
447 {
448 res = THERR_INVALID_DATA;
449 goto exit;
450 }
451
452 pnode->mode = TH_RE_MATCH_COUNT_GREEDY;
453 439
454 if (cch == '*') 440 if (cch == '*')
455 { 441 {
456 // Previous token can repeat 0 or more times 442 // Previous token can repeat 0 or more times
457 pnode->repeatMin = 0; 443 pnode->repeatMin = 0;
481 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK || 467 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK ||
482 (res = th_regex_strndup(&tmp, ctx.pattern + start, 468 (res = th_regex_strndup(&tmp, ctx.pattern + start,
483 ctx.offs - start)) != THERR_OK) 469 ctx.offs - start)) != THERR_OK)
484 goto exit; 470 goto exit;
485 471
486 pnode->mode = TH_RE_MATCH_COUNT_GREEDY; 472 pnode->mode = TH_RE_MATCH_COUNT;
487 473
488 if (th_regex_find_next(tmp, 0, &start, ',')) 474 if (th_regex_find_next(tmp, 0, &start, ','))
489 { 475 {
490 tmp[start] = 0; 476 tmp[start] = 0;
491 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) || 477 if (!th_regex_parse_ssize_t(tmp, &pnode->repeatMin) ||
742 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++) 728 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++)
743 { 729 {
744 const th_regex_node_t *node = &expr->nodes[nnode]; 730 const th_regex_node_t *node = &expr->nodes[nnode];
745 731
746 DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n", 732 DBG_RE_MATCH(" expr [%" PRIu_SIZE_T "/%" PRIu_SIZE_T "]: %s %s '%s'\n",
747 nnode, expr->nnodes, 733 nnode + 1, expr->nnodes,
748 re_match_modes[node->mode], re_match_types[node->type], 734 re_match_modes[node->mode], re_match_types[node->type],
749 haystack + *poffs); 735 haystack + soffs);
750 736
751 switch (node->mode) 737 switch (node->mode)
752 { 738 {
753 case TH_RE_MATCH_ONCE: 739 case TH_RE_MATCH_ONCE:
754 res = th_regex_match_one(haystack, poffs, node, flags); 740 res = th_regex_match_one(haystack, &soffs, node, flags);
755 break; 741 break;
756 742
757 case TH_RE_MATCH_COUNT_GREEDY: 743 case TH_RE_MATCH_COUNT:
758 case TH_RE_MATCH_COUNT_NONGREEDY:
759 { 744 {
760 ssize_t count = 0; 745 ssize_t count = 0;
761 746
762 DBG_RE_MATCH(" COUNT min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n", 747 DBG_RE_MATCH(" COUNT min=%" PRId_SSIZE_T ", max=%" PRId_SSIZE_T "\n",
763 node->repeatMin, node->repeatMax); 748 node->repeatMin, node->repeatMax);
764 749
765 do 750 do
766 { 751 {
767 BOOL match; 752 BOOL match;
768 size_t toffs = *poffs, tnode = nnode; 753 size_t toffs = soffs, tnode;
769 754
755 DBG_RE_MATCH(" START '%s'\n", haystack + toffs);
770 do { 756 do {
771 match = th_regex_match_one(haystack, &toffs, node, flags); 757 match = TRUE;
772 if (match && haystack[toffs] != 0) 758 for (tnode = nnode; match && tnode < expr->nnodes && haystack[toffs] != 0; )
773 { 759 {
774 for (tnode = nnode + 1; match && tnode < expr->nnodes && haystack[toffs] != 0; tnode++) 760 match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags);
775 { 761 if (match)
776 size_t noffs = toffs; 762 tnode++;
777 match = th_regex_match_one(haystack, &toffs, &expr->nodes[tnode], flags);
778 DBG_RE_MATCH(" '%s': %d\n", haystack + noffs, match);
779 }
780 } 763 }
764 DBG_RE_MATCH(" '%s': %d (tnode=%" PRIu_SIZE_T ")\n", haystack + toffs, match, tnode);
765 if (node->repeatMin >= 0 && match)
766 break;
781 } while (!match && haystack[toffs] != 0); 767 } while (!match && haystack[toffs] != 0);
768
769 DBG_RE_MATCH(" END '%s': %d (tnode=%" PRIu_SIZE_T ")\n", haystack + toffs, match, tnode);
782 770
783 if (match) 771 if (match)
784 { 772 {
785 // Node matched 773 // Node matched
786 count++; 774 count++;
787 *poffs = soffs; 775 soffs = toffs;
788 nnode = tnode; 776 nnode = tnode;
789 res = (node->repeatMax > 0 && count >= node->repeatMax); 777 res = (node->repeatMax > 0 && count >= node->repeatMax);
790 } 778 }
791 else 779 else
792 { 780 {
799 DBG_RE_MATCH(" RESULT: count=%" PRId_SSIZE_T ", done=%s\n", 787 DBG_RE_MATCH(" RESULT: count=%" PRId_SSIZE_T ", done=%s\n",
800 count, res ? "YES" : "NO"); 788 count, res ? "YES" : "NO");
801 } 789 }
802 break; 790 break;
803 791
804
805 case TH_RE_MATCH_ANCHOR_START: 792 case TH_RE_MATCH_ANCHOR_START:
806 res = (*poffs == 0); 793 res = (soffs == 0);
807 break; 794 break;
808 795
809 case TH_RE_MATCH_ANCHOR_END: 796 case TH_RE_MATCH_ANCHOR_END:
810 res = (haystack[*poffs] == 0); 797 res = (haystack[soffs] == 0);
811 break; 798 break;
812 } 799 }
813 } 800 }
814 801
815 if (!res) 802 if (res)
816 *poffs = soffs; 803 *poffs = soffs;
817 804
818 return res; 805 return res;
819 } 806 }
820 807