# HG changeset patch # User Matti Hamalainen # Date 1560760124 -10800 # Node ID 0f0218dcccf81cc1fede3106b0fcc57dd60029da # Parent f350c7514785c3fa55c45b629c58859672d209db More fixes to the matching sequence search functionality. diff -r f350c7514785 -r 0f0218dcccf8 tools/fanalyze.c --- a/tools/fanalyze.c Mon Jun 17 10:27:05 2019 +0300 +++ b/tools/fanalyze.c Mon Jun 17 11:28:44 2019 +0300 @@ -573,7 +573,7 @@ } -void dmAddMatchSequence(Uint8 *data, const size_t len, DMSourceFile *file, size_t offs) +BOOL dmAddMatchSequence(Uint8 *data, const size_t len, DMSourceFile *file, size_t offs) { DMMatchSeq *seq = NULL; @@ -581,9 +581,8 @@ for (int n = 0; n < ndmSequences; n++) { DMMatchSeq *node = &dmSequences[n]; - if (node->len == len && - (node->data == data || - memcmp(node->data, data, node->len) == 0)) + if (node->len >= len && + memcmp(node->data + node->len - len, data, len) == 0) { seq = node; break; @@ -596,34 +595,46 @@ if (ndmSequences + 1 >= SET_MAX_SEQUENCES) { dmErrorMsg("Too many matching sequences found.\n"); - return; + return FALSE; } seq = &dmSequences[ndmSequences++]; - seq->data = data; - seq->len = len; } else { - // Check for existing + // Check for existing place for (int n = 0; n < seq->nplaces; n++) { DMMatchPlace *place = &seq->places[n]; - if (place->offs == offs && place->file == file) - return; + if (place->file == file && + place->offs + seq->len == offs + len) + return TRUE; } } + seq->data = data; + seq->len = len; + // Add another file + offset if (seq->nplaces < SET_MAX_PLACES) { DMMatchPlace *place = &seq->places[seq->nplaces++]; place->file = file; place->offs = offs; + + return TRUE; } else - return; + return FALSE; +} + +int dmCompareMatchPlaces(const void *pa, const void *pb) +{ + const DMMatchPlace *va = (DMMatchPlace *) pa, + *vb = (DMMatchPlace *) pb; + + return va->offs - vb->offs; } @@ -917,7 +928,7 @@ // // Attempt to find matching sequences of N+ // - dmPrint(2, "Attempting to find matching sequences of %" DM_PRIu_SIZE_T" bytes or more\n", + dmPrint(0, "Attempting to find matching sequences of %" DM_PRIu_SIZE_T" bytes or more\n", optMinMatchLen); for (int nfile1 = 0; nfile1 < nsrcFiles; nfile1++) @@ -930,35 +941,42 @@ DMSourceFile *file2 = &srcFiles[nfile2]; // Find longest possible matching sequence in file2, if any - size_t moffs1 = 0, moffs2 = 0; - while (moffs1 + optMinMatchLen < file1->size && - moffs2 + optMinMatchLen < file2->size) + for (size_t moffs1 = 0; moffs1 + optMinMatchLen < file1->size;) { - size_t cnt; - for (cnt = 0; moffs1 + cnt < file1->size && moffs2 + cnt < file2->size; cnt++) + size_t cnt = 0; + for (size_t moffs2 = 0; moffs2 + optMinMatchLen < file2->size; moffs2++) { - if (file1->data[moffs1 + cnt] != file2->data[moffs2 + cnt]) - break; + for (cnt = 0; moffs1 + cnt + optMinMatchLen < file1->size && + moffs2 + cnt + optMinMatchLen < file2->size; cnt++) + { + if (file1->data[moffs1 + cnt] != file2->data[moffs2 + cnt]) + break; + } + + if (cnt >= optMinMatchLen) + { + // Match found + dmAddMatchSequence(file1->data + moffs1, cnt, file1, moffs1); + dmAddMatchSequence(file2->data + moffs2, cnt, file2, moffs2); + moffs1 += cnt; + } } - if (cnt >= optMinMatchLen) - { - // Match found - dmAddMatchSequence(file1->data + moffs1, cnt, file1, moffs1); - dmAddMatchSequence(file2->data + moffs2, cnt, file2, moffs2); - - moffs1 += cnt; - } - else - { + if (cnt < optMinMatchLen) moffs1++; - moffs2++; - } } } file1->analyzed = TRUE; } + for (int nmatch = 0; nmatch < ndmSequences; nmatch++) + { + DMMatchSeq *seq = &dmSequences[nmatch]; + + qsort(&seq->places, seq->nplaces, sizeof(DMMatchPlace), + dmCompareMatchPlaces); + } + // // Display results // @@ -997,8 +1015,9 @@ for (int nplace = 0; nplace < seq->nplaces; nplace++) { DMMatchPlace *place = &seq->places[nplace]; - printf(" %08" DM_PRIx_SIZE_T ": %s\n", + printf(" %08" DM_PRIx_SIZE_T "-%08" DM_PRIx_SIZE_T ": %s\n", place->offs, + place->offs + seq->len, place->file->filename); }