comparison th_regex.c @ 735:31bc1ed07cf5

Renaming BOOL->bool and TRUE/FALSE to true/false, and using stdbool.h if available.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 07 Dec 2022 12:14:39 +0200
parents 29e44a58bc73
children c17eadc60c3d
comparison
equal deleted inserted replaced
734:2ae1045f6c18 735:31bc1ed07cf5
217 217
218 return THERR_OK; 218 return THERR_OK;
219 } 219 }
220 220
221 221
222 static BOOL th_regex_find_next(const th_char_t *str, 222 static bool th_regex_find_next(const th_char_t *str,
223 const size_t start, size_t *offs, 223 const size_t start, size_t *offs,
224 const th_char_t delim) 224 const th_char_t delim)
225 { 225 {
226 for (*offs = start; str[*offs] != 0; (*offs)++) 226 for (*offs = start; str[*offs] != 0; (*offs)++)
227 { 227 {
228 if (str[*offs] == delim) 228 if (str[*offs] == delim)
229 return TRUE; 229 return true;
230 } 230 }
231 return FALSE; 231 return false;
232 } 232 }
233 233
234 234
235 static BOOL th_regex_parse_ssize_t(const th_char_t *str, 235 static bool th_regex_parse_ssize_t(const th_char_t *str,
236 ssize_t *value) 236 ssize_t *value)
237 { 237 {
238 th_char_t ch; 238 th_char_t ch;
239 BOOL neg; 239 bool neg;
240 240
241 if (*str == '-') 241 if (*str == '-')
242 { 242 {
243 str++; 243 str++;
244 neg = TRUE; 244 neg = true;
245 } 245 }
246 else 246 else
247 neg = FALSE; 247 neg = false;
248 248
249 // Is the value negative? 249 // Is the value negative?
250 while ((ch = *str++)) 250 while ((ch = *str++))
251 { 251 {
252 if (ch >= '0' && ch <= '9') 252 if (ch >= '0' && ch <= '9')
253 { 253 {
254 *value *= 10; 254 *value *= 10;
255 *value += ch - '0'; 255 *value += ch - '0';
256 } 256 }
257 else 257 else
258 return FALSE; 258 return false;
259 } 259 }
260 260
261 if (neg) 261 if (neg)
262 *value = -(*value); 262 *value = -(*value);
263 263
264 return TRUE; 264 return true;
265 } 265 }
266 266
267 267
268 static void th_regex_list_item_init(th_regex_list_item_t *item) 268 static void th_regex_list_item_init(th_regex_list_item_t *item)
269 { 269 {
414 414
415 return th_regex_parse_ctx_node_commit(ctx, &node); 415 return th_regex_parse_ctx_node_commit(ctx, &node);
416 } 416 }
417 417
418 418
419 static int th_regex_parse_ctx_node_commit_strchr(th_regex_parse_ctx_t *ctx, const BOOL split) 419 static int th_regex_parse_ctx_node_commit_strchr(th_regex_parse_ctx_t *ctx, const bool split)
420 { 420 {
421 int res = THERR_OK;; 421 int res = THERR_OK;;
422 422
423 if (ctx->bufPos > 0) 423 if (ctx->bufPos > 0)
424 { 424 {
484 switch (cch) 484 switch (cch)
485 { 485 {
486 case '?': 486 case '?':
487 case '*': 487 case '*':
488 case '+': 488 case '+':
489 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, TRUE)) != THERR_OK) 489 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, true)) != THERR_OK)
490 goto out; 490 goto out;
491 491
492 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) 492 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK)
493 goto out; 493 goto out;
494 494
524 } 524 }
525 } 525 }
526 break; 526 break;
527 527
528 case '{': 528 case '{':
529 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, TRUE)) != THERR_OK) 529 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, true)) != THERR_OK)
530 goto out; 530 goto out;
531 531
532 // {n} | {min,max} 532 // {n} | {min,max}
533 start = ctx.offs + 1; 533 start = ctx.offs + 1;
534 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}')) 534 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}'))
576 } 576 }
577 break; 577 break;
578 578
579 /* 579 /*
580 case '|': 580 case '|':
581 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 581 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
582 goto out; 582 goto out;
583 583
584 // Alt pattern .. how to handle these? 584 // Alt pattern .. how to handle these?
585 break; 585 break;
586 */ 586 */
587 587
588 case '(': 588 case '(':
589 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 589 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
590 goto out; 590 goto out;
591 591
592 // Start of subpattern 592 // Start of subpattern
593 if ((res = th_regex_parse_ctx_push(&ctx)) != THERR_OK) 593 if ((res = th_regex_parse_ctx_push(&ctx)) != THERR_OK)
594 goto out; 594 goto out;
595 break; 595 break;
596 596
597 case ')': 597 case ')':
598 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 598 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
599 goto out; 599 goto out;
600 600
601 // End of subpattern 601 // End of subpattern
602 th_regex_node_init(&node); 602 th_regex_node_init(&node);
603 node.type = TH_RE_TYPE_SUBEXPR; 603 node.type = TH_RE_TYPE_SUBEXPR;
606 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) 606 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
607 goto out; 607 goto out;
608 break; 608 break;
609 609
610 case '^': 610 case '^':
611 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 611 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
612 goto out; 612 goto out;
613 613
614 // Start of line anchor 614 // Start of line anchor
615 th_regex_node_init(&node); 615 th_regex_node_init(&node);
616 node.mode = TH_RE_MATCH_ANCHOR_START; 616 node.mode = TH_RE_MATCH_ANCHOR_START;
618 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) 618 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
619 goto out; 619 goto out;
620 break; 620 break;
621 621
622 case '$': 622 case '$':
623 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 623 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
624 goto out; 624 goto out;
625 625
626 // End of line anchor 626 // End of line anchor
627 th_regex_node_init(&node); 627 th_regex_node_init(&node);
628 node.mode = TH_RE_MATCH_ANCHOR_END; 628 node.mode = TH_RE_MATCH_ANCHOR_END;
630 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) 630 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
631 goto out; 631 goto out;
632 break; 632 break;
633 633
634 case '[': 634 case '[':
635 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 635 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
636 goto out; 636 goto out;
637 637
638 // Start of char list 638 // Start of char list
639 start = ctx.offs + 1; 639 start = ctx.offs + 1;
640 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') || 640 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') ||
658 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) 658 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK)
659 goto out; 659 goto out;
660 break; 660 break;
661 661
662 case '.': 662 case '.':
663 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 663 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
664 goto out; 664 goto out;
665 665
666 // Any single character matches 666 // Any single character matches
667 th_regex_node_init(&node); 667 th_regex_node_init(&node);
668 node.type = TH_RE_TYPE_ANY_CHAR; 668 node.type = TH_RE_TYPE_ANY_CHAR;
685 default: 685 default:
686 // Given character must match 686 // Given character must match
687 if (ctx.bufPos < ctx.bufSize) 687 if (ctx.bufPos < ctx.bufSize)
688 ctx.buf[ctx.bufPos++] = ctx.pattern[ctx.offs]; 688 ctx.buf[ctx.bufPos++] = ctx.pattern[ctx.offs];
689 else 689 else
690 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 690 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
691 goto out; 691 goto out;
692 break; 692 break;
693 } 693 }
694 } 694 }
695 695
696 // Commit last string/char if any 696 // Commit last string/char if any
697 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) 697 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK)
698 goto out; 698 goto out;
699 699
700 // Create root node 700 // Create root node
701 th_regex_node_init(&node); 701 th_regex_node_init(&node);
702 node.type = TH_RE_TYPE_SUBEXPR; 702 node.type = TH_RE_TYPE_SUBEXPR;
839 } 839 }
840 } 840 }
841 } 841 }
842 842
843 843
844 static BOOL th_regex_match_list(const th_regex_list_t *list, const th_char_t cch) 844 static bool th_regex_match_list(const th_regex_list_t *list, const th_char_t cch)
845 { 845 {
846 // Could be optimized, perhaps .. sort match.chars, binary search etc? 846 // Could be optimized, perhaps .. sort match.chars, binary search etc?
847 for (size_t nitem = 0; nitem < list->nitems; nitem++) 847 for (size_t nitem = 0; nitem < list->nitems; nitem++)
848 { 848 {
849 const th_regex_list_item_t *item = &list->items[nitem]; 849 const th_regex_list_item_t *item = &list->items[nitem];
851 if (item->type == 0) 851 if (item->type == 0)
852 { 852 {
853 for (size_t n = 0; n < item->nchars; n++) 853 for (size_t n = 0; n < item->nchars; n++)
854 { 854 {
855 if (item->chars[n] == cch) 855 if (item->chars[n] == cch)
856 return TRUE; 856 return true;
857 } 857 }
858 } 858 }
859 else 859 else
860 { 860 {
861 if (cch >= item->start && cch <= item->end) 861 if (cch >= item->start && cch <= item->end)
862 return TRUE; 862 return true;
863 } 863 }
864 } 864 }
865 865
866 return FALSE; 866 return false;
867 } 867 }
868 868
869 869
870 static BOOL th_regex_match_expr( 870 static bool th_regex_match_expr(
871 const th_char_t *haystack, 871 const th_char_t *haystack,
872 size_t *offs, 872 size_t *offs,
873 const th_regex_t *expr, 873 const th_regex_t *expr,
874 const size_t startnode, 874 const size_t startnode,
875 const int flags, 875 const int flags,
876 const int level 876 const int level
877 ); 877 );
878 878
879 879
880 static BOOL th_regex_match_one( 880 static bool th_regex_match_one(
881 const th_char_t *haystack, 881 const th_char_t *haystack,
882 size_t *offs, 882 size_t *offs,
883 const th_regex_node_t *node, 883 const th_regex_node_t *node,
884 const int flags, 884 const int flags,
885 const int level 885 const int level
886 ) 886 )
887 { 887 {
888 th_char_t cch; 888 th_char_t cch;
889 BOOL res = FALSE; 889 bool res = false;
890 890
891 switch (node->type) 891 switch (node->type)
892 { 892 {
893 case TH_RE_TYPE_SUBEXPR: 893 case TH_RE_TYPE_SUBEXPR:
894 res = th_regex_match_expr(haystack, offs, node->match.expr, 0, flags, level + 1); 894 res = th_regex_match_expr(haystack, offs, node->match.expr, 0, flags, level + 1);
895 break; 895 break;
896 896
897 case TH_RE_TYPE_LIST: 897 case TH_RE_TYPE_LIST:
898 case TH_RE_TYPE_LIST_REVERSE: 898 case TH_RE_TYPE_LIST_REVERSE:
899 if ((cch = haystack[*offs]) == 0) 899 if ((cch = haystack[*offs]) == 0)
900 res = FALSE; 900 res = false;
901 else 901 else
902 { 902 {
903 res = th_regex_match_list(&node->match.list, cch); 903 res = th_regex_match_list(&node->match.list, cch);
904 904
905 if (node->type == TH_RE_TYPE_LIST_REVERSE) 905 if (node->type == TH_RE_TYPE_LIST_REVERSE)
909 } 909 }
910 break; 910 break;
911 911
912 case TH_RE_TYPE_ANY_CHAR: 912 case TH_RE_TYPE_ANY_CHAR:
913 if ((cch = haystack[*offs]) == 0) 913 if ((cch = haystack[*offs]) == 0)
914 res = FALSE; 914 res = false;
915 else 915 else
916 { 916 {
917 res = TRUE; 917 res = true;
918 (*offs)++; 918 (*offs)++;
919 } 919 }
920 break; 920 break;
921 921
922 case TH_RE_TYPE_CHAR: 922 case TH_RE_TYPE_CHAR:
923 if ((cch = haystack[*offs]) == 0) 923 if ((cch = haystack[*offs]) == 0)
924 res = FALSE; 924 res = false;
925 else 925 else
926 { 926 {
927 res = (cch == node->match.chr); 927 res = (cch == node->match.chr);
928 (*offs)++; 928 (*offs)++;
929 } 929 }
930 break; 930 break;
931 931
932 case TH_RE_TYPE_STR: 932 case TH_RE_TYPE_STR:
933 res = TRUE; 933 res = true;
934 for (th_char_t *str = node->match.str; 934 for (th_char_t *str = node->match.str;
935 res && *str != 0; 935 res && *str != 0;
936 str++, (*offs)++) 936 str++, (*offs)++)
937 { 937 {
938 if (haystack[*offs] != *str) 938 if (haystack[*offs] != *str)
939 res = FALSE; 939 res = false;
940 } 940 }
941 break; 941 break;
942 } 942 }
943 943
944 return res; 944 return res;
945 } 945 }
946 946
947 947
948 static BOOL th_regex_match_count( 948 static bool th_regex_match_count(
949 const th_char_t *haystack, 949 const th_char_t *haystack,
950 size_t *offs, 950 size_t *offs,
951 const th_regex_t *expr, 951 const th_regex_t *expr,
952 const th_regex_node_t *node, 952 const th_regex_node_t *node,
953 size_t *nnode, 953 size_t *nnode,
960 960
961 do 961 do
962 { 962 {
963 // Attempt to match the repeated node once 963 // Attempt to match the repeated node once
964 size_t poffs = toffs; 964 size_t poffs = toffs;
965
965 if (th_regex_match_one(haystack, &poffs, node, flags, level)) 966 if (th_regex_match_one(haystack, &poffs, node, flags, level))
966 { 967 {
967 // Matched, increase count of repeats 968 // Matched, increase count of repeats
968 count++; 969 count++;
969 //DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); 970 //DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count);
1022 1023
1023 1024
1024 } while (haystack[toffs] != 0); 1025 } while (haystack[toffs] != 0);
1025 1026
1026 // Check results 1027 // Check results
1027 BOOL res = count >= node->repeatMin || 1028 bool res = count >= node->repeatMin ||
1028 (node->repeatMax > 0 && count >= node->repeatMax); 1029 (node->repeatMax > 0 && count >= node->repeatMax);
1029 1030
1030 if (res) 1031 if (res)
1031 { 1032 {
1032 *offs = last_offs; 1033 *offs = last_offs;
1039 1040
1040 return res; 1041 return res;
1041 } 1042 }
1042 1043
1043 1044
1044 static BOOL th_regex_match_expr( 1045 static bool th_regex_match_expr(
1045 const th_char_t *haystack, 1046 const th_char_t *haystack,
1046 size_t *offs, 1047 size_t *offs,
1047 const th_regex_t *expr, 1048 const th_regex_t *expr,
1048 const size_t startnode, 1049 const size_t startnode,
1049 const int flags, 1050 const int flags,
1050 const int level 1051 const int level
1051 ) 1052 )
1052 { 1053 {
1053 BOOL res = TRUE; 1054 bool res = true;
1054 size_t soffs = *offs; 1055 size_t soffs = *offs;
1055 1056
1056 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++) 1057 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++)
1057 { 1058 {
1058 const th_regex_node_t *node = &expr->nodes[nnode]; 1059 const th_regex_node_t *node = &expr->nodes[nnode];