Mercurial > hg > th-libs
comparison th_regex.c @ 666:e1d27caf0dbd
More work on regex stuff.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 27 Jan 2020 12:43:39 +0200 |
parents | c5aa9ada1051 |
children | 039aa00cbfbf |
comparison
equal
deleted
inserted
replaced
665:4932188c9101 | 666:e1d27caf0dbd |
---|---|
940 size_t *nnode, | 940 size_t *nnode, |
941 const int flags, | 941 const int flags, |
942 const int level | 942 const int level |
943 ) | 943 ) |
944 { | 944 { |
945 size_t toffs = *offs, noffs; | 945 size_t toffs = *offs; |
946 BOOL res, match = FALSE; | 946 BOOL res = FALSE, rest = FALSE; |
947 ssize_t count = 0; | 947 ssize_t count = 0; |
948 | 948 |
949 if (node->repeatMin > 0) | |
950 do | 949 do |
951 { | 950 { |
952 noffs = toffs; | 951 // Attempt to match the repeated node once |
953 match = th_regex_match_one(haystack, &toffs, node, flags, level); | 952 size_t poffs; |
954 if (match) | 953 BOOL matched; |
955 { | 954 |
955 poffs = toffs; | |
956 if ((matched = th_regex_match_one(haystack, &poffs, node, flags, level))) | |
957 { | |
958 // Matched, increase count | |
956 count++; | 959 count++; |
960 // DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); | |
961 | |
957 } | 962 } |
958 else | 963 else |
959 toffs = noffs; | 964 { |
960 | 965 // No match, backtrack |
961 if (node->repeatMin >= 0 && | 966 poffs = toffs; |
962 count >= node->repeatMin && | 967 // DBG_RE_PRINT("nope\n"); |
963 node->repeatMax > 0 && | 968 if (rest) |
964 count >= node->repeatMax) | 969 break; |
970 } | |
971 | |
972 // Attempt to match rest of the expression if matched | |
973 // or if required repeats are 0 | |
974 if (matched || node->repeatMin == 0) | |
975 { | |
976 size_t qoffs = poffs; | |
977 DBG_RE_PRINT("try rest '%s'\n", haystack + qoffs); | |
978 if (th_regex_match_expr(haystack, &qoffs, expr, *nnode + 1, flags, level + 1)) | |
979 { | |
980 // Matched | |
981 | |
982 // Check min repeats and if we are "not greedy". | |
983 if (count >= node->repeatMin && node->repeatMax == 1) | |
984 res = TRUE; | |
985 | |
986 // Check max repeats | |
987 if (node->repeatMax > 0 && count >= node->repeatMax) | |
988 res = TRUE; | |
989 | |
990 DBG_RE_PRINT("yes: res=%s count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", res ? "yes" : "no", count, node->repeatMin, node->repeatMax); | |
991 toffs = qoffs; | |
992 } | |
993 else | |
994 { | |
995 // Rest of expression did not match | |
996 DBG_RE_PRINT("no\n"); | |
997 toffs = poffs; | |
998 } | |
999 | |
1000 rest = TRUE; | |
1001 } | |
1002 else | |
1003 { | |
1004 DBG_RE_PRINT("no match and repeatmin>0\n"); | |
965 break; | 1005 break; |
966 | 1006 } |
967 } while (match && toffs > noffs); | 1007 |
968 | 1008 // DBG_RE_PRINT("res=%d [%" PRIu_SIZE_T "='%c']\n", res, toffs, haystack[toffs]); |
969 if (count > 0 || node->repeatMin == 0) | 1009 |
970 { | 1010 } while (!res && haystack[toffs] != 0); |
971 DBG_RE_PRINT("count=%" PRId_SSIZE_T " \"%s\"\n", | 1011 |
972 count, haystack + toffs); | 1012 // Check min repeats and if we are "not greedy". |
973 | 1013 if (count >= node->repeatMin || |
974 match = th_regex_match_expr(haystack, &toffs, expr, *nnode + 1, flags, level + 1); | 1014 (node->repeatMax > 0 && count >= node->repeatMax)) |
975 | 1015 res = TRUE; |
976 DBG_RE_PRINT("rest expr match=%s \"%s\"\n", | 1016 |
977 match ? "YES" : "NO", haystack + toffs); | 1017 if (res) |
978 } | |
979 | |
980 if (match) | |
981 { | 1018 { |
982 *offs = toffs; | 1019 *offs = toffs; |
983 *nnode = expr->nnodes; | 1020 *nnode = expr->nnodes; |
984 } | 1021 } |
985 | 1022 |
986 res = match && | 1023 DBG_RE_PRINT("RESULT: %s : offs=%" PRIu_SIZE_T "='%s'\n", |
987 ( | 1024 res ? "YES" : "NO", |
988 (node->repeatMax > 0 && count >= node->repeatMax) || | 1025 *offs, haystack + *offs); |
989 (node->repeatMin >= 0 && count >= node->repeatMin) | |
990 ); | |
991 | |
992 DBG_RE_PRINT("RESULT: match=%s, res=%s\n", | |
993 match ? "YES" : "NO", res ? "YES" : "NO"); | |
994 | 1026 |
995 return res; | 1027 return res; |
996 } | 1028 } |
997 | 1029 |
998 | 1030 |