Mercurial > hg > th-libs
comparison th_regex.c @ 735:31bc1ed07cf5
Renaming BOOL->bool and TRUE/FALSE to true/false, and using stdbool.h if available.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 07 Dec 2022 12:14:39 +0200 |
parents | 29e44a58bc73 |
children | c17eadc60c3d |
comparison
equal
deleted
inserted
replaced
734:2ae1045f6c18 | 735:31bc1ed07cf5 |
---|---|
217 | 217 |
218 return THERR_OK; | 218 return THERR_OK; |
219 } | 219 } |
220 | 220 |
221 | 221 |
222 static BOOL th_regex_find_next(const th_char_t *str, | 222 static bool th_regex_find_next(const th_char_t *str, |
223 const size_t start, size_t *offs, | 223 const size_t start, size_t *offs, |
224 const th_char_t delim) | 224 const th_char_t delim) |
225 { | 225 { |
226 for (*offs = start; str[*offs] != 0; (*offs)++) | 226 for (*offs = start; str[*offs] != 0; (*offs)++) |
227 { | 227 { |
228 if (str[*offs] == delim) | 228 if (str[*offs] == delim) |
229 return TRUE; | 229 return true; |
230 } | 230 } |
231 return FALSE; | 231 return false; |
232 } | 232 } |
233 | 233 |
234 | 234 |
235 static BOOL th_regex_parse_ssize_t(const th_char_t *str, | 235 static bool th_regex_parse_ssize_t(const th_char_t *str, |
236 ssize_t *value) | 236 ssize_t *value) |
237 { | 237 { |
238 th_char_t ch; | 238 th_char_t ch; |
239 BOOL neg; | 239 bool neg; |
240 | 240 |
241 if (*str == '-') | 241 if (*str == '-') |
242 { | 242 { |
243 str++; | 243 str++; |
244 neg = TRUE; | 244 neg = true; |
245 } | 245 } |
246 else | 246 else |
247 neg = FALSE; | 247 neg = false; |
248 | 248 |
249 // Is the value negative? | 249 // Is the value negative? |
250 while ((ch = *str++)) | 250 while ((ch = *str++)) |
251 { | 251 { |
252 if (ch >= '0' && ch <= '9') | 252 if (ch >= '0' && ch <= '9') |
253 { | 253 { |
254 *value *= 10; | 254 *value *= 10; |
255 *value += ch - '0'; | 255 *value += ch - '0'; |
256 } | 256 } |
257 else | 257 else |
258 return FALSE; | 258 return false; |
259 } | 259 } |
260 | 260 |
261 if (neg) | 261 if (neg) |
262 *value = -(*value); | 262 *value = -(*value); |
263 | 263 |
264 return TRUE; | 264 return true; |
265 } | 265 } |
266 | 266 |
267 | 267 |
268 static void th_regex_list_item_init(th_regex_list_item_t *item) | 268 static void th_regex_list_item_init(th_regex_list_item_t *item) |
269 { | 269 { |
414 | 414 |
415 return th_regex_parse_ctx_node_commit(ctx, &node); | 415 return th_regex_parse_ctx_node_commit(ctx, &node); |
416 } | 416 } |
417 | 417 |
418 | 418 |
419 static int th_regex_parse_ctx_node_commit_strchr(th_regex_parse_ctx_t *ctx, const BOOL split) | 419 static int th_regex_parse_ctx_node_commit_strchr(th_regex_parse_ctx_t *ctx, const bool split) |
420 { | 420 { |
421 int res = THERR_OK;; | 421 int res = THERR_OK;; |
422 | 422 |
423 if (ctx->bufPos > 0) | 423 if (ctx->bufPos > 0) |
424 { | 424 { |
484 switch (cch) | 484 switch (cch) |
485 { | 485 { |
486 case '?': | 486 case '?': |
487 case '*': | 487 case '*': |
488 case '+': | 488 case '+': |
489 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, TRUE)) != THERR_OK) | 489 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, true)) != THERR_OK) |
490 goto out; | 490 goto out; |
491 | 491 |
492 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) | 492 if ((res = th_regex_parse_ctx_get_prev_node(&ctx, &pnode)) != THERR_OK) |
493 goto out; | 493 goto out; |
494 | 494 |
524 } | 524 } |
525 } | 525 } |
526 break; | 526 break; |
527 | 527 |
528 case '{': | 528 case '{': |
529 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, TRUE)) != THERR_OK) | 529 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, true)) != THERR_OK) |
530 goto out; | 530 goto out; |
531 | 531 |
532 // {n} | {min,max} | 532 // {n} | {min,max} |
533 start = ctx.offs + 1; | 533 start = ctx.offs + 1; |
534 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}')) | 534 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, '}')) |
576 } | 576 } |
577 break; | 577 break; |
578 | 578 |
579 /* | 579 /* |
580 case '|': | 580 case '|': |
581 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 581 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
582 goto out; | 582 goto out; |
583 | 583 |
584 // Alt pattern .. how to handle these? | 584 // Alt pattern .. how to handle these? |
585 break; | 585 break; |
586 */ | 586 */ |
587 | 587 |
588 case '(': | 588 case '(': |
589 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 589 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
590 goto out; | 590 goto out; |
591 | 591 |
592 // Start of subpattern | 592 // Start of subpattern |
593 if ((res = th_regex_parse_ctx_push(&ctx)) != THERR_OK) | 593 if ((res = th_regex_parse_ctx_push(&ctx)) != THERR_OK) |
594 goto out; | 594 goto out; |
595 break; | 595 break; |
596 | 596 |
597 case ')': | 597 case ')': |
598 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 598 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
599 goto out; | 599 goto out; |
600 | 600 |
601 // End of subpattern | 601 // End of subpattern |
602 th_regex_node_init(&node); | 602 th_regex_node_init(&node); |
603 node.type = TH_RE_TYPE_SUBEXPR; | 603 node.type = TH_RE_TYPE_SUBEXPR; |
606 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) | 606 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) |
607 goto out; | 607 goto out; |
608 break; | 608 break; |
609 | 609 |
610 case '^': | 610 case '^': |
611 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 611 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
612 goto out; | 612 goto out; |
613 | 613 |
614 // Start of line anchor | 614 // Start of line anchor |
615 th_regex_node_init(&node); | 615 th_regex_node_init(&node); |
616 node.mode = TH_RE_MATCH_ANCHOR_START; | 616 node.mode = TH_RE_MATCH_ANCHOR_START; |
618 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) | 618 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) |
619 goto out; | 619 goto out; |
620 break; | 620 break; |
621 | 621 |
622 case '$': | 622 case '$': |
623 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 623 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
624 goto out; | 624 goto out; |
625 | 625 |
626 // End of line anchor | 626 // End of line anchor |
627 th_regex_node_init(&node); | 627 th_regex_node_init(&node); |
628 node.mode = TH_RE_MATCH_ANCHOR_END; | 628 node.mode = TH_RE_MATCH_ANCHOR_END; |
630 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) | 630 if ((res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) |
631 goto out; | 631 goto out; |
632 break; | 632 break; |
633 | 633 |
634 case '[': | 634 case '[': |
635 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 635 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
636 goto out; | 636 goto out; |
637 | 637 |
638 // Start of char list | 638 // Start of char list |
639 start = ctx.offs + 1; | 639 start = ctx.offs + 1; |
640 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') || | 640 if (!th_regex_find_next(ctx.pattern, start, &ctx.offs, ']') || |
658 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) | 658 (res = th_regex_parse_ctx_node_commit(&ctx, &node)) != THERR_OK) |
659 goto out; | 659 goto out; |
660 break; | 660 break; |
661 | 661 |
662 case '.': | 662 case '.': |
663 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 663 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
664 goto out; | 664 goto out; |
665 | 665 |
666 // Any single character matches | 666 // Any single character matches |
667 th_regex_node_init(&node); | 667 th_regex_node_init(&node); |
668 node.type = TH_RE_TYPE_ANY_CHAR; | 668 node.type = TH_RE_TYPE_ANY_CHAR; |
685 default: | 685 default: |
686 // Given character must match | 686 // Given character must match |
687 if (ctx.bufPos < ctx.bufSize) | 687 if (ctx.bufPos < ctx.bufSize) |
688 ctx.buf[ctx.bufPos++] = ctx.pattern[ctx.offs]; | 688 ctx.buf[ctx.bufPos++] = ctx.pattern[ctx.offs]; |
689 else | 689 else |
690 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 690 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
691 goto out; | 691 goto out; |
692 break; | 692 break; |
693 } | 693 } |
694 } | 694 } |
695 | 695 |
696 // Commit last string/char if any | 696 // Commit last string/char if any |
697 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, FALSE)) != THERR_OK) | 697 if ((res = th_regex_parse_ctx_node_commit_strchr(&ctx, false)) != THERR_OK) |
698 goto out; | 698 goto out; |
699 | 699 |
700 // Create root node | 700 // Create root node |
701 th_regex_node_init(&node); | 701 th_regex_node_init(&node); |
702 node.type = TH_RE_TYPE_SUBEXPR; | 702 node.type = TH_RE_TYPE_SUBEXPR; |
839 } | 839 } |
840 } | 840 } |
841 } | 841 } |
842 | 842 |
843 | 843 |
844 static BOOL th_regex_match_list(const th_regex_list_t *list, const th_char_t cch) | 844 static bool th_regex_match_list(const th_regex_list_t *list, const th_char_t cch) |
845 { | 845 { |
846 // Could be optimized, perhaps .. sort match.chars, binary search etc? | 846 // Could be optimized, perhaps .. sort match.chars, binary search etc? |
847 for (size_t nitem = 0; nitem < list->nitems; nitem++) | 847 for (size_t nitem = 0; nitem < list->nitems; nitem++) |
848 { | 848 { |
849 const th_regex_list_item_t *item = &list->items[nitem]; | 849 const th_regex_list_item_t *item = &list->items[nitem]; |
851 if (item->type == 0) | 851 if (item->type == 0) |
852 { | 852 { |
853 for (size_t n = 0; n < item->nchars; n++) | 853 for (size_t n = 0; n < item->nchars; n++) |
854 { | 854 { |
855 if (item->chars[n] == cch) | 855 if (item->chars[n] == cch) |
856 return TRUE; | 856 return true; |
857 } | 857 } |
858 } | 858 } |
859 else | 859 else |
860 { | 860 { |
861 if (cch >= item->start && cch <= item->end) | 861 if (cch >= item->start && cch <= item->end) |
862 return TRUE; | 862 return true; |
863 } | 863 } |
864 } | 864 } |
865 | 865 |
866 return FALSE; | 866 return false; |
867 } | 867 } |
868 | 868 |
869 | 869 |
870 static BOOL th_regex_match_expr( | 870 static bool th_regex_match_expr( |
871 const th_char_t *haystack, | 871 const th_char_t *haystack, |
872 size_t *offs, | 872 size_t *offs, |
873 const th_regex_t *expr, | 873 const th_regex_t *expr, |
874 const size_t startnode, | 874 const size_t startnode, |
875 const int flags, | 875 const int flags, |
876 const int level | 876 const int level |
877 ); | 877 ); |
878 | 878 |
879 | 879 |
880 static BOOL th_regex_match_one( | 880 static bool th_regex_match_one( |
881 const th_char_t *haystack, | 881 const th_char_t *haystack, |
882 size_t *offs, | 882 size_t *offs, |
883 const th_regex_node_t *node, | 883 const th_regex_node_t *node, |
884 const int flags, | 884 const int flags, |
885 const int level | 885 const int level |
886 ) | 886 ) |
887 { | 887 { |
888 th_char_t cch; | 888 th_char_t cch; |
889 BOOL res = FALSE; | 889 bool res = false; |
890 | 890 |
891 switch (node->type) | 891 switch (node->type) |
892 { | 892 { |
893 case TH_RE_TYPE_SUBEXPR: | 893 case TH_RE_TYPE_SUBEXPR: |
894 res = th_regex_match_expr(haystack, offs, node->match.expr, 0, flags, level + 1); | 894 res = th_regex_match_expr(haystack, offs, node->match.expr, 0, flags, level + 1); |
895 break; | 895 break; |
896 | 896 |
897 case TH_RE_TYPE_LIST: | 897 case TH_RE_TYPE_LIST: |
898 case TH_RE_TYPE_LIST_REVERSE: | 898 case TH_RE_TYPE_LIST_REVERSE: |
899 if ((cch = haystack[*offs]) == 0) | 899 if ((cch = haystack[*offs]) == 0) |
900 res = FALSE; | 900 res = false; |
901 else | 901 else |
902 { | 902 { |
903 res = th_regex_match_list(&node->match.list, cch); | 903 res = th_regex_match_list(&node->match.list, cch); |
904 | 904 |
905 if (node->type == TH_RE_TYPE_LIST_REVERSE) | 905 if (node->type == TH_RE_TYPE_LIST_REVERSE) |
909 } | 909 } |
910 break; | 910 break; |
911 | 911 |
912 case TH_RE_TYPE_ANY_CHAR: | 912 case TH_RE_TYPE_ANY_CHAR: |
913 if ((cch = haystack[*offs]) == 0) | 913 if ((cch = haystack[*offs]) == 0) |
914 res = FALSE; | 914 res = false; |
915 else | 915 else |
916 { | 916 { |
917 res = TRUE; | 917 res = true; |
918 (*offs)++; | 918 (*offs)++; |
919 } | 919 } |
920 break; | 920 break; |
921 | 921 |
922 case TH_RE_TYPE_CHAR: | 922 case TH_RE_TYPE_CHAR: |
923 if ((cch = haystack[*offs]) == 0) | 923 if ((cch = haystack[*offs]) == 0) |
924 res = FALSE; | 924 res = false; |
925 else | 925 else |
926 { | 926 { |
927 res = (cch == node->match.chr); | 927 res = (cch == node->match.chr); |
928 (*offs)++; | 928 (*offs)++; |
929 } | 929 } |
930 break; | 930 break; |
931 | 931 |
932 case TH_RE_TYPE_STR: | 932 case TH_RE_TYPE_STR: |
933 res = TRUE; | 933 res = true; |
934 for (th_char_t *str = node->match.str; | 934 for (th_char_t *str = node->match.str; |
935 res && *str != 0; | 935 res && *str != 0; |
936 str++, (*offs)++) | 936 str++, (*offs)++) |
937 { | 937 { |
938 if (haystack[*offs] != *str) | 938 if (haystack[*offs] != *str) |
939 res = FALSE; | 939 res = false; |
940 } | 940 } |
941 break; | 941 break; |
942 } | 942 } |
943 | 943 |
944 return res; | 944 return res; |
945 } | 945 } |
946 | 946 |
947 | 947 |
948 static BOOL th_regex_match_count( | 948 static bool th_regex_match_count( |
949 const th_char_t *haystack, | 949 const th_char_t *haystack, |
950 size_t *offs, | 950 size_t *offs, |
951 const th_regex_t *expr, | 951 const th_regex_t *expr, |
952 const th_regex_node_t *node, | 952 const th_regex_node_t *node, |
953 size_t *nnode, | 953 size_t *nnode, |
960 | 960 |
961 do | 961 do |
962 { | 962 { |
963 // Attempt to match the repeated node once | 963 // Attempt to match the repeated node once |
964 size_t poffs = toffs; | 964 size_t poffs = toffs; |
965 | |
965 if (th_regex_match_one(haystack, &poffs, node, flags, level)) | 966 if (th_regex_match_one(haystack, &poffs, node, flags, level)) |
966 { | 967 { |
967 // Matched, increase count of repeats | 968 // Matched, increase count of repeats |
968 count++; | 969 count++; |
969 //DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); | 970 //DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); |
1022 | 1023 |
1023 | 1024 |
1024 } while (haystack[toffs] != 0); | 1025 } while (haystack[toffs] != 0); |
1025 | 1026 |
1026 // Check results | 1027 // Check results |
1027 BOOL res = count >= node->repeatMin || | 1028 bool res = count >= node->repeatMin || |
1028 (node->repeatMax > 0 && count >= node->repeatMax); | 1029 (node->repeatMax > 0 && count >= node->repeatMax); |
1029 | 1030 |
1030 if (res) | 1031 if (res) |
1031 { | 1032 { |
1032 *offs = last_offs; | 1033 *offs = last_offs; |
1039 | 1040 |
1040 return res; | 1041 return res; |
1041 } | 1042 } |
1042 | 1043 |
1043 | 1044 |
1044 static BOOL th_regex_match_expr( | 1045 static bool th_regex_match_expr( |
1045 const th_char_t *haystack, | 1046 const th_char_t *haystack, |
1046 size_t *offs, | 1047 size_t *offs, |
1047 const th_regex_t *expr, | 1048 const th_regex_t *expr, |
1048 const size_t startnode, | 1049 const size_t startnode, |
1049 const int flags, | 1050 const int flags, |
1050 const int level | 1051 const int level |
1051 ) | 1052 ) |
1052 { | 1053 { |
1053 BOOL res = TRUE; | 1054 bool res = true; |
1054 size_t soffs = *offs; | 1055 size_t soffs = *offs; |
1055 | 1056 |
1056 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++) | 1057 for (size_t nnode = startnode; res && nnode < expr->nnodes; nnode++) |
1057 { | 1058 { |
1058 const th_regex_node_t *node = &expr->nodes[nnode]; | 1059 const th_regex_node_t *node = &expr->nodes[nnode]; |