changeset 2270:0f0218dcccf8

More fixes to the matching sequence search functionality.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 17 Jun 2019 11:28:44 +0300
parents f350c7514785
children dcf9abeec930
files tools/fanalyze.c
diffstat 1 files changed, 51 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/tools/fanalyze.c	Mon Jun 17 10:27:05 2019 +0300
+++ b/tools/fanalyze.c	Mon Jun 17 11:28:44 2019 +0300
@@ -573,7 +573,7 @@
 }
 
 
-void dmAddMatchSequence(Uint8 *data, const size_t len, DMSourceFile *file, size_t offs)
+BOOL dmAddMatchSequence(Uint8 *data, const size_t len, DMSourceFile *file, size_t offs)
 {
     DMMatchSeq *seq = NULL;
 
@@ -581,9 +581,8 @@
     for (int n = 0; n < ndmSequences; n++)
     {
         DMMatchSeq *node = &dmSequences[n];
-        if (node->len == len &&
-            (node->data == data ||
-            memcmp(node->data, data, node->len) == 0))
+        if (node->len >= len &&
+            memcmp(node->data + node->len - len, data, len) == 0)
         {
             seq = node;
             break;
@@ -596,34 +595,46 @@
         if (ndmSequences + 1 >= SET_MAX_SEQUENCES)
         {
             dmErrorMsg("Too many matching sequences found.\n");
-            return;
+            return FALSE;
         }
 
         seq = &dmSequences[ndmSequences++];
-        seq->data = data;
-        seq->len = len;
     }
     else
     {
-        // Check for existing
+        // Check for existing place
         for (int n = 0; n < seq->nplaces; n++)
         {
             DMMatchPlace *place = &seq->places[n];
-            if (place->offs == offs && place->file == file)
-                return;
+            if (place->file == file &&
+                place->offs + seq->len == offs + len)
+                return TRUE;
         }
     }
 
+    seq->data = data;
+    seq->len = len;
+
     // Add another file + offset
     if (seq->nplaces < SET_MAX_PLACES)
     {
         DMMatchPlace *place = &seq->places[seq->nplaces++];
         place->file = file;
         place->offs = offs;
+
+        return TRUE;
     }
     else
-        return;
+        return FALSE;
+}
+
 
+int dmCompareMatchPlaces(const void *pa, const void *pb)
+{
+    const DMMatchPlace *va = (DMMatchPlace *) pa,
+        *vb = (DMMatchPlace *) pb;
+
+    return va->offs - vb->offs;
 }
 
 
@@ -917,7 +928,7 @@
         //
         // Attempt to find matching sequences of N+
         //
-        dmPrint(2, "Attempting to find matching sequences of %" DM_PRIu_SIZE_T" bytes or more\n",
+        dmPrint(0, "Attempting to find matching sequences of %" DM_PRIu_SIZE_T" bytes or more\n",
             optMinMatchLen);
 
         for (int nfile1 = 0; nfile1 < nsrcFiles; nfile1++)
@@ -930,35 +941,42 @@
                 DMSourceFile *file2 = &srcFiles[nfile2];
 
                 // Find longest possible matching sequence in file2, if any
-                size_t moffs1 = 0, moffs2 = 0;
-                while (moffs1 + optMinMatchLen < file1->size &&
-                       moffs2 + optMinMatchLen < file2->size)
+                for (size_t moffs1 = 0; moffs1 + optMinMatchLen < file1->size;)
                 {
-                    size_t cnt;
-                    for (cnt = 0; moffs1 + cnt < file1->size && moffs2 + cnt < file2->size; cnt++)
+                    size_t cnt = 0;
+                    for (size_t moffs2 = 0; moffs2 + optMinMatchLen < file2->size; moffs2++)
                     {
-                        if (file1->data[moffs1 + cnt] != file2->data[moffs2 + cnt])
-                            break;
+                        for (cnt = 0; moffs1 + cnt + optMinMatchLen < file1->size &&
+                            moffs2 + cnt + optMinMatchLen < file2->size; cnt++)
+                        {
+                            if (file1->data[moffs1 + cnt] != file2->data[moffs2 + cnt])
+                                break;
+                        }
+
+                        if (cnt >= optMinMatchLen)
+                        {
+                            // Match found
+                            dmAddMatchSequence(file1->data + moffs1, cnt, file1, moffs1);
+                            dmAddMatchSequence(file2->data + moffs2, cnt, file2, moffs2);
+                            moffs1 += cnt;
+                        }
                     }
 
-                    if (cnt >= optMinMatchLen)
-                    {
-                        // Match found
-                        dmAddMatchSequence(file1->data + moffs1, cnt, file1, moffs1);
-                        dmAddMatchSequence(file2->data + moffs2, cnt, file2, moffs2);
-
-                        moffs1 += cnt;
-                    }
-                    else
-                    {
+                    if (cnt < optMinMatchLen)
                         moffs1++;
-                        moffs2++;
-                    }
                 }
             }
             file1->analyzed = TRUE;
         }
 
+        for (int nmatch = 0; nmatch < ndmSequences; nmatch++)
+        {
+            DMMatchSeq *seq = &dmSequences[nmatch];
+
+            qsort(&seq->places, seq->nplaces, sizeof(DMMatchPlace),
+                dmCompareMatchPlaces);
+        }
+
         //
         // Display results
         //
@@ -997,8 +1015,9 @@
             for (int nplace = 0; nplace < seq->nplaces; nplace++)
             {
                 DMMatchPlace *place = &seq->places[nplace];
-                printf("    %08" DM_PRIx_SIZE_T ": %s\n",
+                printf("    %08" DM_PRIx_SIZE_T "-%08" DM_PRIx_SIZE_T ": %s\n",
                     place->offs,
+                    place->offs + seq->len,
                     place->file->filename);
 
             }