view tools/fanalyze.c @ 2208:90ec1ec89c56

Revamp the palette handling in lib64gfx somewhat, add helper functions to lib64util for handling external palette file options and add support for specifying one of the "internal" palettes or external (.act) palette file to gfxconv and 64vw.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 14 Jun 2019 05:01:12 +0300
parents 1662730053d0
children 837c79747ea4
line wrap: on
line source

/*
 * Fanalyze - Analyze similarities between multiple files
 * Programmed and designed by Matti 'ccr' Hamalainen
 * (C) Copyright 2018 Tecnic Software productions (TNSP)
 *
 * Please read file 'COPYING' for information on license and distribution.
 */
#include "dmtool.h"
#include "dmlib.h"
#include "dmargs.h"
#include "dmfile.h"

#define SET_MAX_FILES    64
#define SET_MAX_ELEMS    256
#define SET_MAX_VALUES   64


/* Typedefs
 */
typedef struct
{
    Uint8 stats[SET_MAX_ELEMS];
    Uint8 variants, data;
    int interest[16];
    int interestF[16];
} DMCompElem;


typedef struct
{
    int count;
    Uint8 value;
} DMStatValue;


typedef struct
{
    DMStatValue cv[SET_MAX_ELEMS];
} DMStats;


typedef struct
{
    char *filename;
    Uint8 *data;
    size_t size; // offset, crop_start, crop_end, doCrop?
    DMStats stats;
} DMSourceFile;


enum
{
    DMGV_UINT8 = 0,
    DMGV_UINT16_LE,
    DMGV_UINT16_BE,
    DMGV_UINT32_LE,
    DMGV_UINT32_BE,

    DMGV_last
};


enum
{
    DMGS_HEX = 0,
    DMGS_DEC,
    DMGS_last
};


typedef struct
{
    int type;
    int disp;
    Uint32 value;
} DMGrepValue;


typedef struct
{
    char *name;
    Uint32 nmax;
    unsigned int bsize;
} DMGrepType;


static const DMGrepType dmGrepTypes[DMGV_last] =
{
    { "8bit (byte)"      , (1ULL <<  8) - 1, 1 },
    { "16bit (word) LE"  , (1ULL << 16) - 1, 2 },
    { "16bit (word) BE"  , (1ULL << 16) - 1, 2 },
    { "32bit (word) LE"  , (1ULL << 32) - 1, 4 },
    { "32bit (word) BE"  , (1ULL << 32) - 1, 4 },
};


typedef struct
{
    char *name;
    char *fmt;
} DMGrepDisp;


static const DMGrepDisp dmGrepDisp[DMGS_last] =
{
    { "hex", "x" },
    { "dec", "d" },
};

enum
{
    FA_ANALYZE,
    FA_GREP,
    FA_OFFSET,
};


/* Global variables
 */
int            setMode = FA_ANALYZE;
int            nsrcFiles = 0;              // Number of source files
DMSourceFile   srcFiles[SET_MAX_FILES];    // Source file names
DMStats        totalStats;
int            nsetGrepValues = 0;
DMGrepValue    setGrepValues[SET_MAX_VALUES];


/* Arguments
 */
static const DMOptArg optList[] =
{
    {  0, '?', "help",        "Show this help", OPT_NONE },
    {  1, 'v', "verbose",     "Be more verbose", OPT_NONE },
    {  2, 'g', "grep",        "Binary grep <val>[,<le|be>[8|16|32]]", OPT_ARGREQ },
    {  3, 'o', "offset",      "Show data in offset <offset>,<le|be>[8|16|32][d|x]]", OPT_ARGREQ },
};

static const int optListN = sizeof(optList) / sizeof(optList[0]);


void argShowHelp()
{
    dmPrintBanner(stdout, dmProgName, "[options] <input file #1> <input file #2> [...]");
    dmArgsPrintHelp(stdout, optList, optListN, 0);
}


int argParseGrepValue(const char *arg, const int mode)
{
    const char *sep = strchr(arg, ',');
    char *vspec, *vstr;
    int vdisp = DMGS_HEX, vtype = -1, ret = DMERR_OK;
    Uint32 vval;

    if (setMode != FA_ANALYZE && setMode != mode)
    {
        dmErrorMsg("Options specifying multiple operating modes can't be used.\n");
        return DMERR_INVALID_ARGS;
    }
    setMode = mode;

    // Do we have spec?
    if (sep != NULL)
    {
        vspec = dm_strdup_trim(sep + 1, DM_TRIM_BOTH);
        vstr = dm_strndup_trim(arg, sep - arg, DM_TRIM_BOTH);
    }
    else
    {
        vspec = NULL;
        vstr = dm_strdup(arg);
    }

    // Parse spec if any
    if (vspec != NULL)
    {
        BOOL vendianess = TRUE;
        char *vtmp = vspec;

        // Get endianess specifier, if any
        if (dm_strncasecmp(vtmp, "le", 2) == 0)
        {
            vendianess = TRUE;
            vtmp += 2;
        }
        else
        if (dm_strncasecmp(vtmp, "be", 2) == 0)
        {
            vendianess = FALSE;
            vtmp += 2;
        }

        // Get value bit size
        if (strncmp(vtmp, "8", 1) == 0)
        {
            vtype = DMGV_UINT8;
            vtmp += 1;
        }
        else
        if (strncmp(vtmp, "16", 2) == 0)
        {
            vtype = vendianess ? DMGV_UINT16_LE : DMGV_UINT16_BE;
            vtmp += 2;
        }
        else
        if (strncmp(vtmp, "32", 2) == 0)
        {
            vtype = vendianess ? DMGV_UINT32_LE : DMGV_UINT32_BE;
            vtmp += 2;
        }
        else
        {
            ret = dmError(DMERR_INVALID_ARGS,
                "Invalid grep type '%s'.\n",
                vspec);
            goto out;
        }

        switch (tolower(*vtmp))
        {
            case 'd':
                vdisp = DMGS_DEC;
                break;

            case 'x': case 'h':
                vdisp = DMGS_HEX;
                break;

            case 0:
                break;

            default:
                ret = dmError(DMERR_INVALID_ARGS,
                    "Invalid grep view type '%s'.\n",
                    vspec);
                goto out;
        }
    }

    // Get value
    if (!dmGetIntVal(vstr, &vval, NULL))
    {
        ret = dmError(DMERR_INVALID_ARGS,
            "Not a valid integer value '%s'.\n",
            vstr);
        goto out;
    }

    if (mode == FA_GREP)
    {
        // Check if we need to guess size
        if (vtype < 0)
        {
            for (int n = DMGV_last; n >= 0; n--)
            {
                const DMGrepType *def = &dmGrepTypes[n];
                if (vval <= def->nmax)
                    vtype = n;
            }
        }

        if (vtype < 0)
        {
            ret = dmError(DMERR_INVALID_ARGS,
                "Could not guess value type for '%s'.\n",
                arg);
            goto out;
        }

        // Check range
        if (vval > dmGrepTypes[vtype].nmax)
        {
            ret = dmError(DMERR_INVALID_ARGS,
                "Integer value %d <= %d <= %d out of range for type %s.\n",
                vval, 0, dmGrepTypes[vtype].nmax, dmGrepTypes[vtype].name);

            goto out;
        }
    }
    else
    if (mode == FA_OFFSET)
    {
        if (vtype < 0)
            vtype = DMGV_UINT8;
    }

    if (nsetGrepValues < SET_MAX_VALUES)
    {
        DMGrepValue *node = &setGrepValues[nsetGrepValues++];
        node->type = vtype;
        node->disp = vdisp;
        node->value = vval;

        dmMsg(1, "Grep value %s : %d / 0x%x\n",
            dmGrepTypes[vtype].name,
            vval, vval);
    }
    else
    {
        ret = dmError(DMERR_BOUNDS,
            "Too many values specified (max %d).",
            SET_MAX_VALUES);
    }

out:
    dmFree(vspec);
    dmFree(vstr);
    return ret;
}


BOOL argHandleOpt(const int optN, char *optArg, char *currArg)
{
    (void) optArg;

    switch (optN)
    {
        case 0:
            argShowHelp();
            exit(0);
            break;

        case 1:
            dmVerbosity++;
            break;

        case 2:
            return argParseGrepValue(optArg, FA_GREP) == DMERR_OK;

        case 3:
            return argParseGrepValue(optArg, FA_OFFSET) == DMERR_OK;

        default:
            dmErrorMsg("Unknown argument '%s'.\n", currArg);
            return FALSE;
    }

    return TRUE;
}


BOOL argHandleNonOpt(char *currArg)
{
    if (nsrcFiles < SET_MAX_FILES)
    {
        DMSourceFile *file = &srcFiles[nsrcFiles++];
        file->filename = currArg;
        return TRUE;
    }
    else
    {
        dmErrorMsg("Maximum number of input files exceeded (%d).\n",
            SET_MAX_FILES);
        return TRUE;
    }
}


void dmInitStats(DMStats *stats)
{
    for (size_t n = 0; n < SET_MAX_ELEMS; n++)
    {
        stats->cv[n].count = 0;
        stats->cv[n].value = n;
    }
}


int dmCompareStatFunc(const void *va, const void *vb)
{
    const DMStatValue *pa = va, *pb = vb;
    return pb->count - pa->count;
}


void dmPrintStats(DMStats *stats, const int nmax, const size_t size)
{
    qsort(&stats->cv, SET_MAX_ELEMS, sizeof(DMStatValue), dmCompareStatFunc);

    for (int n = 0; n < nmax; n++)
    {
        printf("$%02x (%d = %1.2f%%), ",
            stats->cv[n].value,
            stats->cv[n].count,
            ((float) stats->cv[n].count * 100.0f) / (float) size);
    }
    printf("\n\n");
}


BOOL dmGetData(const int type, const DMSourceFile *file, const size_t offs, Uint32 *mval)
{
    Uint8 *data = file->data + offs;
    if (offs + dmGrepTypes[type].bsize >= file->size)
    {
        *mval = 0;
        return FALSE;
    }

    switch (type)
    {
        case DMGV_UINT8:
            *mval = *((Uint8 *) data);
            break;

        case DMGV_UINT16_LE:
            *mval = DM_LE16_TO_NATIVE(*((Uint16 *) data));
            break;

        case DMGV_UINT16_BE:
            *mval = DM_BE16_TO_NATIVE(*((Uint16 *) data));
            break;

        case DMGV_UINT32_LE:
            *mval = DM_LE32_TO_NATIVE(*((Uint32 *) data));
            break;

        case DMGV_UINT32_BE:
            *mval = DM_BE32_TO_NATIVE(*((Uint32 *) data));
            break;

        default:
            *mval = 0;
            return FALSE;
    }
    return TRUE;
}


int main(int argc, char *argv[])
{
    DMCompElem *compBuf = NULL;
    size_t compBufSize = 0, totalSize = 0;
    int res;

    dmInitProg("fanalyze", "File format analyzer", "0.3", NULL, NULL);
    dmVerbosity = 1;

    dmInitStats(&totalStats);

    // Parse arguments
    if (!dmArgsProcess(argc, argv, optList, optListN,
        argHandleOpt, argHandleNonOpt, OPTH_BAILOUT))
        exit(1);

    if (nsrcFiles < 1)
    {
        dmErrorMsg("Nothing to do. (try --help)\n");
        goto out;
    }

    // Read input files
    for (int nfile = 0; nfile < nsrcFiles; nfile++)
    {
        DMSourceFile *file = &srcFiles[nfile];
        if ((res = dmReadDataFile(NULL, file->filename, &file->data, &file->size)) != DMERR_OK)
        {
            dmErrorMsg("Could not read '%s': %s\n",
                file->filename, dmErrorStr(res));
            goto out;
        }

        dmPrint(2, "Input #%d: '%s', %" DM_PRIu_SIZE_T " bytes.\n",
            nfile + 1, file->filename, file->size);

        if (!compBufSize || file->size < compBufSize)
            compBufSize = file->size;

        totalSize += file->size;
        dmInitStats(&file->stats);
    }


    //
    // Check what operating mode we are in
    //
    if (setMode == FA_GREP)
    {
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];
            dmPrint(0, "\n%s\n", file->filename);

            for (int n = 0; n < nsetGrepValues; n++)
            {
                DMGrepValue *node = &setGrepValues[n];
                const DMGrepType *def = &dmGrepTypes[node->type];

                for (size_t offs = 0; offs + def->bsize < file->size; offs++)
                {
                    Uint32 mval;
                    dmGetData(node->type, file, offs, &mval);

                    if (mval == node->value)
                    {
                        dmPrint(0, "%08x : %s match %d / 0x%x\n",
                            offs, def->name, mval, mval);
                    }
                }
            }
        }
    }
    else
    if (setMode == FA_OFFSET)
    {
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];
            dmPrint(1, "#%03d: %s\n", nfile + 1, file->filename);
        }

        printf("  offset :");
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
            printf("    %03d   ", nfile + 1);
        printf("\n");

        printf("==========");
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
            printf("===========");
        printf("\n");

        for (int n = 0; n < nsetGrepValues; n++)
        {
            DMGrepValue *node = &setGrepValues[n];
            const DMGrepType *def = &dmGrepTypes[node->type];
            printf("%08x : ", node->value);

            for (int nfile = 0; nfile < nsrcFiles; nfile++)
            {
                DMSourceFile *file = &srcFiles[nfile];
                Uint32 mval;
                char mstr[32];
                int npad, nwidth;

                if (dmGetData(node->type, file, node->value, &mval))
                {
                    char mfmt[16];
                    nwidth = def->bsize * 2;
                    snprintf(mfmt, sizeof(mfmt), "%%0%d%s",
                        nwidth, dmGrepDisp[node->disp].fmt);

                    snprintf(mstr, sizeof(mstr), mfmt, mval);
                }
                else
                {
                    strcpy(mstr, "----");
                    nwidth = 4;
                }

                npad = (10 - nwidth) / 2;
                for (int q = 0; q < npad; q++)
                    fputc(' ', stdout);

                fputs(mstr, stdout);

                for (int q = 0; q < npad; q++)
                    fputc(' ', stdout);
            }

            printf("  [%s]\n",
                dmGrepDisp[node->disp].name);
        }
    }
    else
    if (setMode == FA_ANALYZE)
    {
        // Allocate comparision buffer
        // XXX: integer overflow?
        dmPrint(2, "Allocating %d element (%d bytes) comparision buffer.\n",
            compBufSize, compBufSize * sizeof(DMCompElem));

        if ((compBuf = dmCalloc(compBufSize, sizeof(DMCompElem))) == NULL)
        {
            dmErrorMsg("Out of memory. Could not allocate comparision buffer!\n");
            goto out;
        }

        // Begin analyzing ..
        dmPrint(2, "Analyzing ..\n");
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];

            for (size_t offs = 0; offs < file->size; offs++)
            {
                Uint8 bv = file->data[offs];
                totalStats.cv[bv].count++;
                file->stats.cv[bv].count++;
            }

            for (size_t offs = 0; offs < compBufSize; offs++)
            {
                Uint8 data = offs < file->size ? file->data[offs] : 0;
                compBuf[offs].stats[data]++;
            }
        }

        for (size_t offs = 0; offs < compBufSize; offs++)
        {
            DMCompElem *el = &compBuf[offs];
            for (int n = 0; n < SET_MAX_ELEMS; n++)
            {
                if (el->stats[n] > 0)
                {
                    el->variants++;
                    el->data = n;
                }
            }
        }

        // Display results
        for (size_t offs = 0, n = 0; offs < compBufSize; offs++)
        {
            DMCompElem *el = &compBuf[offs];
            BOOL var = el->variants > 1;

            if (n == 0)
                printf("%08" DM_PRIx_SIZE_T " | ", offs);

            if (var)
                printf("[%2d] ", el->variants);
            else
                printf(" %02x  ", el->data);

            if (++n >= 16)
            {
                printf("\n");
                n = 0;
            }
        }

        printf("\n");

        // Attempt further analysis
        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];
            size_t len = file->size > compBufSize ? compBufSize : file->size;
            for (size_t offs = 0; offs + 4 < len; offs++)
            {
                DMCompElem *elem = &compBuf[offs];

                for (int variant = 3; variant >= 0; variant--)
                {
                    size_t nmax = (variant < 2) ? sizeof(Uint16) : sizeof(Uint32);
                    Uint32 tmp = 0;

                    for (size_t n = 0; n < nmax; n++)
                    {
                        size_t boffs = (variant & 1) ? n : nmax - n;

                        tmp <<= 8;
                        tmp |= file->data[offs + boffs];
                    }

                    if (file->size - tmp < 32)
                    {
                        elem->interest[variant] += 32 - (file->size - tmp);
                        elem->interestF[variant]++;
                    }
                }
            }
        }

        printf("\nMore findings:\n");
        for (size_t offs = 0; offs + 4 < compBufSize; offs++)
        {
            DMCompElem *elem = &compBuf[offs];

            for (int variant = 0; variant < 4; variant++)
            if (elem->interestF[variant] > 0)
            {
                printf("%08" DM_PRIx_SIZE_T " | V%d : %d / %d\n",
                offs, variant,
                elem->interestF[variant], elem->interest[variant]);
            }
        }

        printf("\nGlobal most used bytes:\n");
        dmPrintStats(&totalStats, 16, totalSize);

        for (int nfile = 0; nfile < nsrcFiles; nfile++)
        {
            DMSourceFile *file = &srcFiles[nfile];
            printf("Most used bytes for '%s':\n", file->filename);
            dmPrintStats(&file->stats, 16, file->size);
        }
    }
    else
    {
        dmErrorMsg("Invalid operating mode?\n");
    }

out:
    for (int nfile = 0; nfile < nsrcFiles; nfile++)
    {
        DMSourceFile *file = &srcFiles[nfile];
        dmFree(file->data);
    }

    return 0;
}