view tools/fanalyze.c @ 2576:812b16ee49db

I had been living under apparent false impression that "realfft.c" on which the FFT implementation in DMLIB was basically copied from was released in public domain at some point, but it could very well be that it never was. Correct license is (or seems to be) GNU GPL. Thus I removing the code from DMLIB, and profusely apologize to the author, Philip Van Baren. It was never my intention to distribute code based on his original work under a more liberal license than originally intended.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 11 Mar 2022 16:32:50 +0200
parents b205c60aa657
children 9f26a93d7e70
line wrap: on
line source

/*
 * Fanalyze - Commandline tool for analyzing similarities between multiple files
 * Programmed and designed by Matti 'ccr' Hamalainen
 * (C) Copyright 2018-2022 Tecnic Software productions (TNSP)
 *
 * Please read file 'COPYING' for information on license and distribution.
 */
#include "dmtool.h"
#include "dmlib.h"
#include "dmargs.h"
#include "dmfile.h"

#define SET_MAX_FILES          64
#define SET_MAX_ELEMS          256
#define SET_MAX_VARIANTS       4

#define SET_MAX_GREP_VALUES    128
#define SET_MAX_GREP_LIST      128

#define SET_MAX_SEQUENCES      1024
#define SET_MAX_PLACES         1024


/* Typedefs
 */
typedef struct
{
    Uint8 stats[SET_MAX_ELEMS];
    Uint8 variants, data;
    int interest[SET_MAX_VARIANTS];
    int interestF[SET_MAX_VARIANTS];
} DMCompElem;


typedef struct
{
    int count;
    Uint8 value;
} DMStatValue;


typedef struct
{
    DMStatValue cv[SET_MAX_ELEMS];
} DMStats;


typedef struct
{
    char *filename;
    Uint8 *data;
    size_t size;
    DMStats stats;
    BOOL analyzed;
    size_t index;
} DMSourceFile;


typedef struct
{
    DMSourceFile *file;  // pointer to file struct where match was found
    size_t offs;         // offset to match in file data
} DMMatchPlace;


typedef struct
{
    size_t len;     // length of the matching sequence
    Uint8 *data;    // "const" pointer to data in one file, don't free()

    int nfiles;     // number of separate files match was found
    int nplaces;    // number of places where match was found
    DMMatchPlace places[SET_MAX_PLACES];
} DMMatchSeq;


enum
{
    DMGV_UINT8 = 0,
    DMGV_UINT16_LE,
    DMGV_UINT16_BE,
    DMGV_UINT32_LE,
    DMGV_UINT32_BE,

    DMGV_last
};


enum
{
    DMGS_HEX = 0,
    DMGS_DEC,
    DMGS_last
};


typedef struct
{
    int type;
    int disp;
    int nvalues;
    Uint32 values[SET_MAX_GREP_LIST];
    BOOL vwildcards[SET_MAX_GREP_LIST];
} DMGrepValue;


typedef struct
{
    char *name;
    char *desc;
    Uint32 nmax;
    unsigned int bsize;
} DMGrepType;


static const DMGrepType dmGrepTypes[DMGV_last] =
{
    { "8"    , "8bit (byte)"      , (1ULL <<  8) - 1, 1 },
    { "le16" , "16bit (word) LE"  , (1ULL << 16) - 1, 2 },
    { "be16" , "16bit (word) BE"  , (1ULL << 16) - 1, 2 },
    { "le32" , "32bit (word) LE"  , (1ULL << 32) - 1, 4 },
    { "be32" , "32bit (word) BE"  , (1ULL << 32) - 1, 4 },
};


typedef struct
{
    char *name;
    char *fmtPrefix;
    char *fmt;
} DMGrepDisp;


static const DMGrepDisp dmGrepDisp[DMGS_last] =
{
    { "hex", "0", "x" },
    { "dec", "" , "d" },
};


enum
{
    FA_ANALYZE,
    FA_GREP,
    FA_OFFSET,
    FA_MATCHES,
};


/* Global variables
 */
int            setMode = FA_ANALYZE;
int            nsrcFiles = 0;              // Number of source files
DMSourceFile   srcFiles[SET_MAX_FILES];    // Source file names
DMStats        totalStats;
int            nsetGrepValues = 0;
DMGrepValue    setGrepValues[SET_MAX_GREP_VALUES];
size_t         optMinMatchLen = 8;
BOOL           optOffsetMode = FALSE;

DMMatchSeq dmSequences[SET_MAX_SEQUENCES];
int ndmSequences = 0;


/* Arguments
 */
static const DMOptArg optList[] =
{
    {  0, '?', "help"            , "Show this help", OPT_NONE },
    {  1,   0, "license"         , "Print out this program's license agreement", OPT_NONE },
    {  2, 'v', "verbose"         , "Be more verbose", OPT_NONE },

    { 10, 'g', "grep"            , "Binary grep <val>[,<val2>...][:<le|be>[8|16|32]]", OPT_ARGREQ },
    { 12, 'o', "offset"          , "Show data in offset <offs>[,<offs2>...][:<le|be>[8|16|32][d|x]]", OPT_ARGREQ },
    { 14, 'm', "match"           , "Find matching sequences minimum of <n> bytes long", OPT_NONE },
    { 16, 'n', "minmatch"        , "Minimum match sequence length", OPT_ARGREQ },

    { 18, 'O', "offset-mode"     , "Output -o offset list when in grep mode (-g)", OPT_NONE },
};

static const int optListN = sizeof(optList) / sizeof(optList[0]);


void argShowHelp()
{
    dmPrintBanner(stdout, dmProgName, "[options] <input file #1> <input file #2> [...]");
    dmArgsPrintHelp(stdout, optList, optListN, 0, 80 - 2);

    fprintf(stdout,
    "\n"
    "Fanalyze is a simplistic commandline tool to assist analysis of similarities\n"
    "between multiple files of same format (but different content). It provides\n"
    "automatic analysis (default operating mode), binary grep functionality (-g)\n"
    "and offset data display (-o)\n"
    "\n"
    "Value lists for grep function can contain wildcard '?' (or '#') which\n"
    "matches any value of the specified (or inferred) type. For example:\n"
    "-g 0x0f,7,5,?,5,?,? will match sequence of bytes 0f 07 05 ?? 05 ?? ??\n"
    "and -g 0xe,0x1001,?,2023:le16 will match le16 value 000e 1001 ???? 07e7\n"
    "\n"
    "NOTICE! Matching sequences search (-m) is considered unfinished and\n"
    "under development.\n"
    );
}


BOOL dmGetData(const int type, const DMSourceFile *file, const size_t offs, Uint32 *mval)
{
    Uint8 *data = file->data + offs;
    if (offs + dmGrepTypes[type].bsize >= file->size)
    {
        *mval = 0;
        return FALSE;
    }

    switch (type)
    {
        case DMGV_UINT8:
            *mval = *((Uint8 *) data);
            break;

        case DMGV_UINT16_LE:
            *mval = DM_LE16_TO_NATIVE(*((Uint16 *) data));
            break;

        case DMGV_UINT16_BE:
            *mval = DM_BE16_TO_NATIVE(*((Uint16 *) data));
            break;

        case DMGV_UINT32_LE:
            *mval = DM_LE32_TO_NATIVE(*((Uint32 *) data));
            break;

        case DMGV_UINT32_BE:
            *mval = DM_BE32_TO_NATIVE(*((Uint32 *) data));
            break;

        default:
            *mval = 0;
            return FALSE;
    }
    return TRUE;
}


void dmPrintGrepValueList(FILE *fh, const DMGrepValue *node, const BOOL match, DMSourceFile *file, const size_t offs)
{
    char mfmt[16];
    unsigned int bsize = dmGrepTypes[node->type].bsize;

    snprintf(mfmt, sizeof(mfmt), "%%%s%d%s%%s",
        dmGrepDisp[node->disp].fmtPrefix,
        bsize * 2,
        dmGrepDisp[node->disp].fmt);

    for (int n = 0; n < node->nvalues; n++)
    {
        const char *veol = (n + 1 < node->nvalues) ? " " : "";

        if (match)
        {
            Uint32 mval;
            dmGetData(node->type, file, offs + n * bsize, &mval);
            fprintf(fh, mfmt, mval, veol);
        }
        else
        {
            if (node->vwildcards[n])
                fprintf(fh, "?%s", veol);
            else
                fprintf(fh, mfmt, node->values[n], veol);
        }
    }
}


int argParseGrepValue(const char *arg, const int mode)
{
    const char *specsep = strchr(arg, ':');
    char *vspec, *vstr, *vsep;
    DMGrepValue val;
    int ret = DMERR_OK;
    BOOL more;

    memset(&val, 0, sizeof(val));

    if (setMode != FA_ANALYZE && setMode != mode)
    {
        dmErrorMsg("Options specifying multiple operating modes can't be used.\n");
        return DMERR_INVALID_ARGS;
    }
    setMode = mode;

    // Do we have spec?
    if (specsep != NULL)
    {
        vspec = dm_strdup_trim(specsep + 1, DM_TRIM_BOTH);
        vstr = dm_strndup_trim(arg, specsep - arg, DM_TRIM_BOTH);
    }
    else
    {
        vspec = NULL;
        vstr = dm_strdup(arg);
    }

    // Parse spec if any
    if (vspec != NULL)
    {
        BOOL vendianess = TRUE;
        char *vtmp = vspec;

        // Get endianess specifier, if any
        if (strncasecmp(vtmp, "le", 2) == 0)
        {
            vendianess = TRUE;
            vtmp += 2;
        }
        else
        if (strncasecmp(vtmp, "be", 2) == 0)
        {
            vendianess = FALSE;
            vtmp += 2;
        }

        // Get value bit size
        if (strncmp(vtmp, "8", 1) == 0)
        {
            val.type = DMGV_UINT8;
            vtmp += 1;
        }
        else
        if (strncmp(vtmp, "16", 2) == 0)
        {
            val.type = vendianess ? DMGV_UINT16_LE : DMGV_UINT16_BE;
            vtmp += 2;
        }
        else
        if (strncmp(vtmp, "32", 2) == 0)
        {
            val.type = vendianess ? DMGV_UINT32_LE : DMGV_UINT32_BE;
            vtmp += 2;
        }
        else
        {
            ret = dmError(DMERR_INVALID_ARGS,
                "Invalid grep type '%s'.\n",
                vspec);
            goto out;
        }

        switch (tolower(*vtmp))
        {
            case 'd':
                val.disp = DMGS_DEC;
                break;

            case 'x': case 'h':
                val.disp = DMGS_HEX;
                break;

            case 0:
                break;

            default:
                ret = dmError(DMERR_INVALID_ARGS,
                    "Invalid grep view type '%s'.\n",
                    vspec);
                goto out;
        }
    }

    // Get list of values
    char *vtmp = vstr;
    do
    {
        if (val.nvalues >= SET_MAX_GREP_LIST)
        {
            ret = dmError(DMERR_BOUNDS,
                "Too many greplist values specified '%s'.\n",
                vstr);
            goto out;
        }

        if ((vsep = strchr(vtmp, ',')) != NULL)
        {
            *vsep = 0;
            more = TRUE;
        }
        else
            more = FALSE;

        if (vtmp[0] == '#' || vtmp[0] == '?')
        {
            val.vwildcards[val.nvalues] = TRUE;
            if (mode == FA_OFFSET)
            {
                ret = dmError(DMERR_INVALID_ARGS,
                    "Offset mode does not allow wildcard values.\n");
                goto out;
            }
        }
        else
        if (!dmGetIntVal(vtmp, &val.values[val.nvalues], NULL))
        {
            ret = dmError(DMERR_INVALID_ARGS,
                "Not a valid integer value '%s'.\n",
                vtmp);
            goto out;
        }

        val.nvalues++;

        if (more)
            vtmp = vsep + 1;
    } while (more);

    if (val.vwildcards[0])
    {
        ret = dmError(DMERR_INVALID_ARGS,
            "First grep value can not be a wildcard.\n");
        goto out;
    }

    if (mode == FA_GREP)
    {
        // Check if we need to guess size
        if (val.type < 0)
        {
            for (int n = DMGV_last; n >= 0; n--)
            {
                const DMGrepType *def = &dmGrepTypes[n];
                if (val.values[0] <= def->nmax)
                    val.type = n;
            }
        }

        if (val.type < 0)
        {
            ret = dmError(DMERR_INVALID_ARGS,
                "Could not guess value type for '%s'.\n",
                arg);
            goto out;
        }

        // Check range
        for (int n = 0; n < val.nvalues; n++)
        if (!val.vwildcards[n] && val.values[n] > dmGrepTypes[val.type].nmax)
        {
            ret = dmError(DMERR_INVALID_ARGS,
                "Integer value %d <= %d <= %d out of range for type %s.\n",
                val.values[n], 0, dmGrepTypes[val.type].nmax,
                dmGrepTypes[val.type].desc);

            goto out;
        }
    }
    else
    if (mode == FA_OFFSET)
    {
        if (val.type < 0)
            val.type = DMGV_UINT8;
    }

    if (nsetGrepValues < SET_MAX_GREP_VALUES)
    {
        DMGrepValue *node = &setGrepValues[nsetGrepValues++];
        memcpy(node, &val, sizeof(val));

        if (mode == FA_GREP)
        {
            printf("Grep %s: ",
                dmGrepTypes[val.type].desc);

            dmPrintGrepValueList(stdout, node, FALSE, NULL, 0);
            printf("\n");
        }
    }
    else
    {
        ret = dmError(DMERR_BOUNDS,
            "Too many values specified (max %d).",
            SET_MAX_GREP_VALUES);
    }

out:
    dmFree(vspec);
    dmFree(vstr);
    return ret;
}


BOOL argHandleOpt(const int optN, char *optArg, char *currArg)
{
    switch (optN)
    {
        case 0:
            argShowHelp();
            exit(0);
            break;

        case 1:
            dmPrintLicense(stdout);
            exit(0);
            break;

        case 2:
            dmVerbosity++;
            break;

        case 10:
            return argParseGrepValue(optArg, FA_GREP) == DMERR_OK;

        case 12:
            return argParseGrepValue(optArg, FA_OFFSET) == DMERR_OK;

        case 14:
            setMode = FA_MATCHES;
            break;

        case 16:
            optMinMatchLen = atoi(optArg);
            if (optMinMatchLen < 2 || optMinMatchLen > 16*1024)
            {
                dmErrorMsg("Invalid minimum match length '%s'.\n",
                    optArg);
                return FALSE;
            }
            return TRUE;

        case 18:
            optOffsetMode = TRUE;
            break;

        default:
            dmErrorMsg("Unknown argument '%s'.\n", currArg);
            return FALSE;
    }

    return TRUE;
}


BOOL argHandleNonOpt(char *currArg)
{
    if (nsrcFiles < SET_MAX_FILES)
    {
        DMSourceFile *file = &srcFiles[nsrcFiles];
        file->filename = currArg;
        file->index = nsrcFiles;
        nsrcFiles++;
        return TRUE;
    }
    else
    {
        dmErrorMsg("Maximum number of input files exceeded (%d).\n",
            SET_MAX_FILES);
        return TRUE;
    }
}


void dmInitStats(DMStats *stats)
{
    for (size_t n = 0; n < SET_MAX_ELEMS; n++)
    {
        stats->cv[n].count = 0;
        stats->cv[n].value = n;
    }
}


int dmCompareStatFunc(const void *va, const void *vb)
{
    const DMStatValue *pa = va, *pb = vb;
    return pb->count - pa->count;
}


void dmPrintStats(DMStats *stats, const int nmax, const size_t size)
{
    qsort(&stats->cv, SET_MAX_ELEMS, sizeof(DMStatValue), dmCompareStatFunc);

    for (int n = 0; n < nmax; n++)
    {
        printf("$%02x (%d = %1.2f%%), ",
            stats->cv[n].value,
            stats->cv[n].count,
            ((float) stats->cv[n].count * 100.0f) / (float) size);
    }
    printf("\n\n");
}


BOOL dmAddMatchSequence(Uint8 *data, const size_t len, DMSourceFile *file, size_t offs)
{
    DMMatchSeq *seq = NULL;

    // Check for existing match sequence
    for (int n = 0; n < ndmSequences; n++)
    {
        DMMatchSeq *node = &dmSequences[n];
        if (node->len >= len &&
            memcmp(node->data + node->len - len, data, len) == 0)
        {
            seq = node;
            break;
        }
    }

    if (seq == NULL)
    {
        // No sequence found, add a new one
        if (ndmSequences + 1 >= SET_MAX_SEQUENCES)
        {
            dmErrorMsg("Too many matching sequences found.\n");
            return FALSE;
        }

        seq = &dmSequences[ndmSequences++];
    }
    else
    {
        // Check for existing place
        for (int n = 0; n < seq->nplaces; n++)
        {
            DMMatchPlace *place = &seq->places[n];
            if (place->file == file &&
                place->offs + seq->len == offs + len)
                return TRUE;
        }
    }

    seq->data = data;
    seq->len = len;

    // Add another file + offset
    if (seq->nplaces < SET_MAX_PLACES)
    {
        DMMatchPlace *place = &seq->places[seq->nplaces++];
        place->file = file;
        place->offs = offs;

        return TRUE;
    }
    else
        return FALSE;
}


int dmCompareMatchPlaces(const void *pa, const void *pb)
{
    const DMMatchPlace *va = (DMMatchPlace *) pa,
        *vb = (DMMatchPlace *) pb;

    return va->offs - vb->offs;
}


int main(int argc, char *argv[])
{
    DMCompElem *compBuf = NULL;
    size_t compBufSize = 0, totalSize = 0, fileFlagsSize;
    BOOL *fileFlags = NULL;
    int res;

    memset(&dmSequences, 0, sizeof(dmSequences));

    dmInitProg("fanalyze", "Simple tool for file format analysis",
        "0.4", NULL, NULL);
    dmVerbosity = 0;

    dmInitStats(&totalStats);

    // Parse arguments
    if (!dmArgsProcess(argc, argv, optList, optListN,
        argHandleOpt, argHandleNonOpt, OPTH_BAILOUT))
        goto out;

    if (nsrcFiles < 1)
    {
        argShowHelp();
        res = dmError(DMERR_INVALID_ARGS,
            "No input file(s) specified.\n");
        goto out;
    }

    // Allocate file flags
    fileFlagsSize = sizeof(BOOL) * nsrcFiles;
    if ((fileFlags = dmMalloc(fileFlagsSize)) == NULL)
    {
        dmErrorMsg("Could not allocate %" DM_PRIu_SIZE_T " bytes of memory for file flag array.\n",
            fileFlagsSize);
        goto out;
    }

    // Read input files
    for (int nfile = 0; nfile < nsrcFiles; nfile++)
    {
        DMSourceFile *file = &srcFiles[nfile];
        if ((res = dmReadDataFile(NULL, file->filename, &file->data, &file->size)) != DMERR_OK)
        {
            dmErrorMsg("Could not read '%s': %s\n",
                file->filename, dmErrorStr(res));
            goto out;
        }

        dmPrint(2, "Input #%d: '%s', %" DM_PRIu_SIZE_T " bytes.\n",
            nfile + 1, file->filename, file->size);

        if (!compBufSize || file->size < compBufSize)
            compBufSize = file->size;

        totalSize += file->size;
        dmInitStats(&file->stats);
    }


    //
    // Check what operating mode we are in
    //
    if (setMode == FA_GREP)
    {
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];
            printf("\n%s\n", file->filename);

            for (int n = 0; n < nsetGrepValues; n++)
            {
                DMGrepValue *node = &setGrepValues[n];
                const DMGrepType *def = &dmGrepTypes[node->type];
                BOOL sep = FALSE;

                if (optOffsetMode)
                {
                    printf("%s %s -o ",
                        argv[0], file->filename);
                }

                for (size_t offs = 0; offs + (def->bsize * node->nvalues) < file->size; offs++)
                {
                    BOOL match = TRUE;
                    for (int n = 0; n < node->nvalues; n++)
                    if (!node->vwildcards[n])
                    {
                        Uint32 mval;
                        dmGetData(node->type, file, offs + n * def->bsize, &mval);

                        if (mval != node->values[n])
                        {
                            match = FALSE;
                            break;
                        }
                    }

                    if (match)
                    {
                        if (optOffsetMode)
                        {
                            printf("%s0x%" DM_PRIx_SIZE_T, sep ? "," : "", offs);
                            sep = TRUE;
                        }
                        else
                            printf("%08" DM_PRIx_SIZE_T, offs);

                        if (!optOffsetMode)
                        {
                            if (dmVerbosity >= 1)
                            {
                                printf(" : ");
                                dmPrintGrepValueList(stdout, node, TRUE, file, offs);
                            }
                            printf("\n");
                        }
                    }
                }

                if (optOffsetMode)
                    printf(":%s\n", def->name);
            }
        }
    }
    else
    if (setMode == FA_OFFSET)
    {
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];
            dmPrint(1, "#%03d: %s\n", nfile + 1, file->filename);
        }

        printf("  offset :");
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
            printf("    %03d   ", nfile + 1);
        printf("\n");

        printf("==========");
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
            printf("===========");
        printf("\n");

        for (int n = 0; n < nsetGrepValues; n++)
        {
            DMGrepValue *node = &setGrepValues[n];
            const DMGrepType *def = &dmGrepTypes[node->type];

            for (int nv = 0; nv < node->nvalues; nv++)
            {
                printf("%08x : ", node->values[nv]);

                for (int nfile = 0; nfile < nsrcFiles; nfile++)
                {
                    DMSourceFile *file = &srcFiles[nfile];
                    Uint32 mval;
                    char mstr[32];
                    int npad, nwidth;

                    if (dmGetData(node->type, file, node->values[nv], &mval))
                    {
                        char mfmt[16];
                        nwidth = def->bsize * 2;
                        snprintf(mfmt, sizeof(mfmt), "%%0%d%s",
                            nwidth, dmGrepDisp[node->disp].fmt);

                        snprintf(mstr, sizeof(mstr), mfmt, mval);
                    }
                    else
                    {
                        strcpy(mstr, "----");
                        nwidth = 4;
                    }

                    npad = (10 - nwidth) / 2;
                    for (int q = 0; q < npad; q++)
                        fputc(' ', stdout);

                    fputs(mstr, stdout);

                    for (int q = 0; q < npad; q++)
                        fputc(' ', stdout);
                }

                printf("  [%s]\n",
                    dmGrepDisp[node->disp].name);
            }
        }
    }
    else
    if (setMode == FA_ANALYZE)
    {
        // Allocate comparision buffer
        // XXX: integer overflow?
        dmPrint(2, "Allocating %" DM_PRIu_SIZE_T " element (%" DM_PRIu_SIZE_T
            " bytes) comparision buffer.\n",
            compBufSize, compBufSize * sizeof(DMCompElem));

        if ((compBuf = dmCalloc(compBufSize, sizeof(DMCompElem))) == NULL)
        {
            dmErrorMsg("Out of memory. Could not allocate comparision buffer!\n");
            goto out;
        }

        //
        // Basic file data comparision
        //
        dmPrint(2, "Analyzing ..\n");
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];

            for (size_t offs = 0; offs < file->size; offs++)
            {
                Uint8 bv = file->data[offs];
                totalStats.cv[bv].count++;
                file->stats.cv[bv].count++;
            }

            for (size_t offs = 0; offs < compBufSize; offs++)
            {
                Uint8 data = offs < file->size ? file->data[offs] : 0;
                compBuf[offs].stats[data]++;
            }
        }

        for (size_t offs = 0; offs < compBufSize; offs++)
        {
            DMCompElem *el = &compBuf[offs];
            for (int n = 0; n < SET_MAX_ELEMS; n++)
            {
                if (el->stats[n] > 0)
                {
                    el->variants++;
                    el->data = n;
                }
            }
        }

        //
        // Display results
        //
        for (size_t offs = 0, n = 0; offs < compBufSize; offs++)
        {
            DMCompElem *el = &compBuf[offs];
            BOOL var = el->variants > 1;

            if (n == 0)
                printf("%08" DM_PRIx_SIZE_T " | ", offs);

            if (var)
                printf("[%2d] ", el->variants);
            else
                printf(" %02x  ", el->data);

            if (++n >= 16)
            {
                printf("\n");
                n = 0;
            }
        }

        printf("\n");

        //
        // Attempt further analysis
        //
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];
            size_t len = file->size > compBufSize ? compBufSize : file->size;
            for (size_t offs = 0; offs + 4 < len; offs++)
            {
                DMCompElem *elem = &compBuf[offs];

                for (int variant = SET_MAX_VARIANTS - 1; variant >= 0; variant--)
                {
                    size_t nmax = (variant < 2) ? sizeof(Uint16) : sizeof(Uint32);
                    Uint32 tmp = 0;

                    for (size_t n = 0; n < nmax; n++)
                    {
                        size_t boffs = (variant & 1) ? n : nmax - n;

                        tmp <<= 8;
                        tmp |= file->data[offs + boffs];
                    }

                    if (file->size - tmp < 32)
                    {
                        elem->interest[variant] += 32 - (file->size - tmp);
                        elem->interestF[variant]++;
                    }
                }
            }
        }

        printf("\nMore findings:\n");
        for (size_t offs = 0; offs + 4 < compBufSize; offs++)
        {
            DMCompElem *elem = &compBuf[offs];

            for (int variant = 0; variant < SET_MAX_VARIANTS; variant++)
            if (elem->interestF[variant] > 0)
            {
                printf("%08" DM_PRIx_SIZE_T " | V%d : %d / %d\n",
                offs, variant,
                elem->interestF[variant], elem->interest[variant]);
            }
        }

        printf("\nGlobal most used bytes:\n");
        dmPrintStats(&totalStats, 16, totalSize);

        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];
            printf("Most used bytes for '%s':\n", file->filename);
            dmPrintStats(&file->stats, 16, file->size);
        }

    }
    else
    if (setMode == FA_MATCHES)
    {
        //
        // Attempt to find matching sequences of N+
        //
        BOOL slow = FALSE;
        int ss = 0;
        printf("Attempting to find matching sequences of %" DM_PRIu_SIZE_T " bytes or more.\n",
            optMinMatchLen);

        if (totalSize > 32*1024)
        {
            dmPrint(0,
            "WARNING! Total data size is large, and the matching \"algorithm\"\n"
            "used is horribly inefficient. This will be quite slow ...\n");
            slow = TRUE;
        }

        for (int nfile1 = 0; nfile1 < nsrcFiles; nfile1++)
        {
            DMSourceFile *file1 = &srcFiles[nfile1];

            for (int nfile2 = 0; nfile2 < nsrcFiles; nfile2++)
            if (nfile2 != nfile1 && !file1->analyzed)
            {
                DMSourceFile *file2 = &srcFiles[nfile2];

                if (slow)
                {
                    dmPrint(0,
                        "Processing .. %1.1f%%\r",
                        (float) ss * 100.0f / (float) (nsrcFiles * (nsrcFiles - 1)));
                    ss++;
                }

                for (size_t moffs1 = 0; moffs1 + optMinMatchLen < file1->size;)
                {
                    size_t cnt = 0;
                    for (size_t moffs2 = 0; moffs2 + optMinMatchLen < file2->size; moffs2++)
                    {
                        for (cnt = 0; moffs1 + cnt + optMinMatchLen < file1->size &&
                            moffs2 + cnt + optMinMatchLen < file2->size; cnt++)
                        {
                            if (file1->data[moffs1 + cnt] != file2->data[moffs2 + cnt])
                                break;
                        }

                        if (cnt >= optMinMatchLen)
                        {
                            // Match found
                            if (!dmAddMatchSequence(file1->data + moffs1, cnt, file1, moffs1) ||
                                !dmAddMatchSequence(file2->data + moffs2, cnt, file2, moffs2))
                                goto done;

                            moffs1 += cnt;
                        }
                    }

                    if (cnt < optMinMatchLen)
                        moffs1++;
                }
            }
            file1->analyzed = TRUE;
        }

done:

        if (slow)
            dmPrint(0, "\n\n");

        for (int nmatch = 0; nmatch < ndmSequences; nmatch++)
        {
            DMMatchSeq *seq = &dmSequences[nmatch];

            qsort(&seq->places, seq->nplaces, sizeof(DMMatchPlace),
                dmCompareMatchPlaces);
        }

        //
        // Count number of files
        //
        for (int nmatch = 0; nmatch < ndmSequences; nmatch++)
        {
            DMMatchSeq *seq = &dmSequences[nmatch];
            memset(fileFlags, 0, fileFlagsSize);

            for (int nplace = 0; nplace < seq->nplaces; nplace++)
            {
                DMMatchPlace *place = &seq->places[nplace];
                if (!fileFlags[place->file->index])
                {
                    fileFlags[place->file->index] = TRUE;
                    seq->nfiles++;
                }
            }
        }

        //
        // Display results
        //
        dmPrint(0, "Found %d matching sequence groups of %" DM_PRIu_SIZE_T " bytes minimum.\n",
            ndmSequences, optMinMatchLen);

        for (int nmatch = 0; nmatch < ndmSequences; nmatch++)
        {
            DMMatchSeq *seq = &dmSequences[nmatch];

            printf("\nSeq of %" DM_PRIu_SIZE_T " bytes in %d places (in %d files)\n",
                seq->len, seq->nplaces, seq->nfiles);

            if (dmVerbosity > 0)
            {
                int n = 0;
                for (size_t offs = 0; offs < seq->len; offs++)
                {
                    if (n == 0)
                        printf("    ");

                    printf("%02x%s",
                        seq->data[offs],
                        offs + 1 < seq->len ? " " : "");

                    if (++n >= 16)
                    {
                        printf("\n");
                        n = 0;
                    }
                }
                if (n > 0)
                    printf("\n");
            }

            for (int nplace = 0; nplace < seq->nplaces; nplace++)
            {
                DMMatchPlace *place = &seq->places[nplace];
                printf("    %08" DM_PRIx_SIZE_T "-%08" DM_PRIx_SIZE_T ": %s\n",
                    place->offs,
                    place->offs + seq->len - 1,
                    place->file->filename);

            }
        }
    }
    else
    {
        dmErrorMsg("Invalid operating mode?\n");
    }

out:
    dmFree(fileFlags);
    dmFree(compBuf);

    for (int nfile = 0; nfile < nsrcFiles; nfile++)
    {
        DMSourceFile *file = &srcFiles[nfile];
        dmFree(file->data);
    }

    return 0;
}