comparison th_regex.c @ 666:e1d27caf0dbd

More work on regex stuff.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 27 Jan 2020 12:43:39 +0200
parents c5aa9ada1051
children 039aa00cbfbf
comparison
equal deleted inserted replaced
665:4932188c9101 666:e1d27caf0dbd
940 size_t *nnode, 940 size_t *nnode,
941 const int flags, 941 const int flags,
942 const int level 942 const int level
943 ) 943 )
944 { 944 {
945 size_t toffs = *offs, noffs; 945 size_t toffs = *offs;
946 BOOL res, match = FALSE; 946 BOOL res = FALSE, rest = FALSE;
947 ssize_t count = 0; 947 ssize_t count = 0;
948 948
949 if (node->repeatMin > 0)
950 do 949 do
951 { 950 {
952 noffs = toffs; 951 // Attempt to match the repeated node once
953 match = th_regex_match_one(haystack, &toffs, node, flags, level); 952 size_t poffs;
954 if (match) 953 BOOL matched;
955 { 954
955 poffs = toffs;
956 if ((matched = th_regex_match_one(haystack, &poffs, node, flags, level)))
957 {
958 // Matched, increase count
956 count++; 959 count++;
960 // DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count);
961
957 } 962 }
958 else 963 else
959 toffs = noffs; 964 {
960 965 // No match, backtrack
961 if (node->repeatMin >= 0 && 966 poffs = toffs;
962 count >= node->repeatMin && 967 // DBG_RE_PRINT("nope\n");
963 node->repeatMax > 0 && 968 if (rest)
964 count >= node->repeatMax) 969 break;
970 }
971
972 // Attempt to match rest of the expression if matched
973 // or if required repeats are 0
974 if (matched || node->repeatMin == 0)
975 {
976 size_t qoffs = poffs;
977 DBG_RE_PRINT("try rest '%s'\n", haystack + qoffs);
978 if (th_regex_match_expr(haystack, &qoffs, expr, *nnode + 1, flags, level + 1))
979 {
980 // Matched
981
982 // Check min repeats and if we are "not greedy".
983 if (count >= node->repeatMin && node->repeatMax == 1)
984 res = TRUE;
985
986 // Check max repeats
987 if (node->repeatMax > 0 && count >= node->repeatMax)
988 res = TRUE;
989
990 DBG_RE_PRINT("yes: res=%s count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", res ? "yes" : "no", count, node->repeatMin, node->repeatMax);
991 toffs = qoffs;
992 }
993 else
994 {
995 // Rest of expression did not match
996 DBG_RE_PRINT("no\n");
997 toffs = poffs;
998 }
999
1000 rest = TRUE;
1001 }
1002 else
1003 {
1004 DBG_RE_PRINT("no match and repeatmin>0\n");
965 break; 1005 break;
966 1006 }
967 } while (match && toffs > noffs); 1007
968 1008 // DBG_RE_PRINT("res=%d [%" PRIu_SIZE_T "='%c']\n", res, toffs, haystack[toffs]);
969 if (count > 0 || node->repeatMin == 0) 1009
970 { 1010 } while (!res && haystack[toffs] != 0);
971 DBG_RE_PRINT("count=%" PRId_SSIZE_T " \"%s\"\n", 1011
972 count, haystack + toffs); 1012 // Check min repeats and if we are "not greedy".
973 1013 if (count >= node->repeatMin ||
974 match = th_regex_match_expr(haystack, &toffs, expr, *nnode + 1, flags, level + 1); 1014 (node->repeatMax > 0 && count >= node->repeatMax))
975 1015 res = TRUE;
976 DBG_RE_PRINT("rest expr match=%s \"%s\"\n", 1016
977 match ? "YES" : "NO", haystack + toffs); 1017 if (res)
978 }
979
980 if (match)
981 { 1018 {
982 *offs = toffs; 1019 *offs = toffs;
983 *nnode = expr->nnodes; 1020 *nnode = expr->nnodes;
984 } 1021 }
985 1022
986 res = match && 1023 DBG_RE_PRINT("RESULT: %s : offs=%" PRIu_SIZE_T "='%s'\n",
987 ( 1024 res ? "YES" : "NO",
988 (node->repeatMax > 0 && count >= node->repeatMax) || 1025 *offs, haystack + *offs);
989 (node->repeatMin >= 0 && count >= node->repeatMin)
990 );
991
992 DBG_RE_PRINT("RESULT: match=%s, res=%s\n",
993 match ? "YES" : "NO", res ? "YES" : "NO");
994 1026
995 return res; 1027 return res;
996 } 1028 }
997 1029
998 1030