comparison th_regex.c @ 667:039aa00cbfbf

Work on regex matcher.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 27 Jan 2020 17:07:06 +0200
parents e1d27caf0dbd
children 7493d4c9ff77
comparison
equal deleted inserted replaced
666:e1d27caf0dbd 667:039aa00cbfbf
940 size_t *nnode, 940 size_t *nnode,
941 const int flags, 941 const int flags,
942 const int level 942 const int level
943 ) 943 )
944 { 944 {
945 size_t toffs = *offs; 945 size_t toffs = *offs, last_offs = *offs;
946 BOOL res = FALSE, rest = FALSE;
947 ssize_t count = 0; 946 ssize_t count = 0;
948 947
949 do 948 do
950 { 949 {
951 // Attempt to match the repeated node once 950 // Attempt to match the repeated node once
952 size_t poffs; 951 size_t poffs = toffs;
953 BOOL matched; 952 if (th_regex_match_one(haystack, &poffs, node, flags, level))
954 953 {
955 poffs = toffs; 954 // Matched, increase count of repeats
956 if ((matched = th_regex_match_one(haystack, &poffs, node, flags, level)))
957 {
958 // Matched, increase count
959 count++; 955 count++;
960 // DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count); 956 //DBG_RE_PRINT("#%" PRId_SSIZE_T "\n", count);
961 957
958 // poffs should now be at position + 1 from match
962 } 959 }
963 else 960 else
964 { 961 {
965 // No match, backtrack 962 // Did not match, get out if repeatMin > 0
966 poffs = toffs; 963 if (node->repeatMin > 0)
967 // DBG_RE_PRINT("nope\n"); 964 break;
968 if (rest) 965 }
969 break; 966
970 } 967 // Attempt to match rest of the expression
971 968 size_t qoffs1 = poffs, qoffs2 = toffs;
972 // Attempt to match rest of the expression if matched 969 DBG_RE_PRINT("try rest '%s' :: '%s'\n", haystack + qoffs1, haystack + qoffs2);
973 // or if required repeats are 0 970 if (th_regex_match_expr(haystack, &qoffs1, expr, *nnode + 1, flags, level + 1))
974 if (matched || node->repeatMin == 0) 971 {
975 { 972 // Matched
976 size_t qoffs = poffs; 973 toffs = last_offs = qoffs1;
977 DBG_RE_PRINT("try rest '%s'\n", haystack + qoffs); 974
978 if (th_regex_match_expr(haystack, &qoffs, expr, *nnode + 1, flags, level + 1)) 975 DBG_RE_PRINT(" yes1: count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", count, node->repeatMin, node->repeatMax);
979 { 976
980 // Matched 977 // Check min repeats and if we are "not greedy".
981 978 if (count >= node->repeatMin && node->repeatMax == 1)
982 // Check min repeats and if we are "not greedy". 979 break;
983 if (count >= node->repeatMin && node->repeatMax == 1) 980
984 res = TRUE; 981 // Check max repeats
985 982 if (node->repeatMax > 0 && count >= node->repeatMax)
986 // Check max repeats 983 break;
987 if (node->repeatMax > 0 && count >= node->repeatMax)
988 res = TRUE;
989
990 DBG_RE_PRINT("yes: res=%s count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", res ? "yes" : "no", count, node->repeatMin, node->repeatMax);
991 toffs = qoffs;
992 }
993 else
994 {
995 // Rest of expression did not match
996 DBG_RE_PRINT("no\n");
997 toffs = poffs;
998 }
999
1000 rest = TRUE;
1001 } 984 }
1002 else 985 else
1003 { 986 if (node->repeatMin == 0 &&
1004 DBG_RE_PRINT("no match and repeatmin>0\n"); 987 th_regex_match_expr(haystack, &qoffs2, expr, *nnode + 1, flags, level + 1))
1005 break; 988 {
1006 } 989 // Matched
1007 990 toffs = last_offs = qoffs2;
1008 // DBG_RE_PRINT("res=%d [%" PRIu_SIZE_T "='%c']\n", res, toffs, haystack[toffs]); 991
1009 992 DBG_RE_PRINT(" yes2: count=%" PRId_SSIZE_T " [%" PRId_SSIZE_T " .. %" PRId_SSIZE_T "]\n", count, node->repeatMin, node->repeatMax);
1010 } while (!res && haystack[toffs] != 0); 993
1011 994 // Check min repeats and if we are "not greedy".
1012 // Check min repeats and if we are "not greedy". 995 if (count >= node->repeatMin && node->repeatMax == 1)
1013 if (count >= node->repeatMin || 996 break;
1014 (node->repeatMax > 0 && count >= node->repeatMax)) 997
1015 res = TRUE; 998 // Check max repeats
999 if (node->repeatMax > 0 && count >= node->repeatMax)
1000 break;
1001
1002 }
1003 else
1004 {
1005 // Rest of expression did not match, try again
1006 DBG_RE_PRINT(" no\n");
1007 toffs = poffs;
1008 }
1009
1010
1011 } while (haystack[toffs] != 0);
1012
1013 // Check results
1014 BOOL res = count >= node->repeatMin ||
1015 (node->repeatMax > 0 && count >= node->repeatMax);
1016 1016
1017 if (res) 1017 if (res)
1018 { 1018 {
1019 *offs = toffs; 1019 *offs = last_offs;
1020 *nnode = expr->nnodes; 1020 *nnode = expr->nnodes;
1021 } 1021 }
1022 1022
1023 DBG_RE_PRINT("RESULT: %s : offs=%" PRIu_SIZE_T "='%s'\n", 1023 DBG_RE_PRINT("RESULT: %s : offs=%" PRIu_SIZE_T "='%s'\n",
1024 res ? "YES" : "NO", 1024 res ? "YES" : "NO",