Mercurial > hg > th-libs
comparison th_regex.c @ 667:039aa00cbfbf
Work on regex matcher.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 27 Jan 2020 17:07:06 +0200 |
parents | e1d27caf0dbd |
children | 7493d4c9ff77 |
comparison
equal
deleted
inserted
replaced
666:e1d27caf0dbd | 667:039aa00cbfbf |
---|---|
940 size_t *nnode, | 940 size_t *nnode, |
941 const int flags, | 941 const int flags, |
942 const int level | 942 const int level |
943 ) | 943 ) |
944 { | 944 { |
945 size_t toffs = *offs; | 945 size_t toffs = *offs, last_offs = *offs; |
946 BOOL res = FALSE, rest = FALSE; | |
947 ssize_t count = 0; | 946 ssize_t count = 0; |
948 | 947 |
949 do | 948 do |
950 { | 949 { |
951 // Attempt to match the repeated node once | 950 // Attempt to match the repeated node once |
952 size_t poffs; | 951 size_t poffs = toffs; |
953 BOOL matched; | 952 if (th_regex_match_one(haystack, &poffs, node, flags, level)) |
954 | 953 { |
955 poffs = toffs; | 954 // Matched, increase count of repeats |
956 if ((matched = th_regex_match_one(haystack, &poffs, node, flags, level))) | |
957 { | |
958 // Matched, increase count | |
959 count++; | 955 count++; |
960 // DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); | 956 //DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); |
961 | 957 |
958 // poffs should now be at position + 1 from match | |
962 } | 959 } |
963 else | 960 else |
964 { | 961 { |
965 // No match, backtrack | 962 // Did not match, get out if repeatMin > 0 |
966 poffs = toffs; | 963 if (node->repeatMin > 0) |
967 // DBG_RE_PRINT("nope\n"); | 964 break; |
968 if (rest) | 965 } |
969 break; | 966 |
970 } | 967 // Attempt to match rest of the expression |
971 | 968 size_t qoffs1 = poffs, qoffs2 = toffs; |
972 // Attempt to match rest of the expression if matched | 969 DBG_RE_PRINT("try rest '%s' :: '%s'\n", haystack + qoffs1, haystack + qoffs2); |
973 // or if required repeats are 0 | 970 if (th_regex_match_expr(haystack, &qoffs1, expr, *nnode + 1, flags, level + 1)) |
974 if (matched || node->repeatMin == 0) | 971 { |
975 { | 972 // Matched |
976 size_t qoffs = poffs; | 973 toffs = last_offs = qoffs1; |
977 DBG_RE_PRINT("try rest '%s'\n", haystack + qoffs); | 974 |
978 if (th_regex_match_expr(haystack, &qoffs, expr, *nnode + 1, flags, level + 1)) | 975 DBG_RE_PRINT(" yes1: count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", count, node->repeatMin, node->repeatMax); |
979 { | 976 |
980 // Matched | 977 // Check min repeats and if we are "not greedy". |
981 | 978 if (count >= node->repeatMin && node->repeatMax == 1) |
982 // Check min repeats and if we are "not greedy". | 979 break; |
983 if (count >= node->repeatMin && node->repeatMax == 1) | 980 |
984 res = TRUE; | 981 // Check max repeats |
985 | 982 if (node->repeatMax > 0 && count >= node->repeatMax) |
986 // Check max repeats | 983 break; |
987 if (node->repeatMax > 0 && count >= node->repeatMax) | |
988 res = TRUE; | |
989 | |
990 DBG_RE_PRINT("yes: res=%s count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", res ? "yes" : "no", count, node->repeatMin, node->repeatMax); | |
991 toffs = qoffs; | |
992 } | |
993 else | |
994 { | |
995 // Rest of expression did not match | |
996 DBG_RE_PRINT("no\n"); | |
997 toffs = poffs; | |
998 } | |
999 | |
1000 rest = TRUE; | |
1001 } | 984 } |
1002 else | 985 else |
1003 { | 986 if (node->repeatMin == 0 && |
1004 DBG_RE_PRINT("no match and repeatmin>0\n"); | 987 th_regex_match_expr(haystack, &qoffs2, expr, *nnode + 1, flags, level + 1)) |
1005 break; | 988 { |
1006 } | 989 // Matched |
1007 | 990 toffs = last_offs = qoffs2; |
1008 // DBG_RE_PRINT("res=%d [%" PRIu_SIZE_T "='%c']\n", res, toffs, haystack[toffs]); | 991 |
1009 | 992 DBG_RE_PRINT(" yes2: count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", count, node->repeatMin, node->repeatMax); |
1010 } while (!res && haystack[toffs] != 0); | 993 |
1011 | 994 // Check min repeats and if we are "not greedy". |
1012 // Check min repeats and if we are "not greedy". | 995 if (count >= node->repeatMin && node->repeatMax == 1) |
1013 if (count >= node->repeatMin || | 996 break; |
1014 (node->repeatMax > 0 && count >= node->repeatMax)) | 997 |
1015 res = TRUE; | 998 // Check max repeats |
999 if (node->repeatMax > 0 && count >= node->repeatMax) | |
1000 break; | |
1001 | |
1002 } | |
1003 else | |
1004 { | |
1005 // Rest of expression did not match, try again | |
1006 DBG_RE_PRINT(" no\n"); | |
1007 toffs = poffs; | |
1008 } | |
1009 | |
1010 | |
1011 } while (haystack[toffs] != 0); | |
1012 | |
1013 // Check results | |
1014 BOOL res = count >= node->repeatMin || | |
1015 (node->repeatMax > 0 && count >= node->repeatMax); | |
1016 | 1016 |
1017 if (res) | 1017 if (res) |
1018 { | 1018 { |
1019 *offs = toffs; | 1019 *offs = last_offs; |
1020 *nnode = expr->nnodes; | 1020 *nnode = expr->nnodes; |
1021 } | 1021 } |
1022 | 1022 |
1023 DBG_RE_PRINT("RESULT: %s : offs=%" PRIu_SIZE_T "='%s'\n", | 1023 DBG_RE_PRINT("RESULT: %s : offs=%" PRIu_SIZE_T "='%s'\n", |
1024 res ? "YES" : "NO", | 1024 res ? "YES" : "NO", |