comparison tools/fanalyze.c @ 1996:4a4c3e15b8c2

Add more analyzing.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 02 Jul 2018 02:26:52 +0300
parents ea6337c873c6
children 8e38fa3c4f98
comparison
equal deleted inserted replaced
1995:ea6337c873c6 1996:4a4c3e15b8c2
8 #include "dmtool.h" 8 #include "dmtool.h"
9 #include "dmlib.h" 9 #include "dmlib.h"
10 #include "dmargs.h" 10 #include "dmargs.h"
11 #include "dmfile.h" 11 #include "dmfile.h"
12 12
13 #define SET_MAX_FILES (8) 13 #define SET_MAX_FILES 16
14 #define SET_MAX_ELEMS 256
14 15
15 16
16 /* Typedefs 17 /* Typedefs
17 */ 18 */
18 typedef struct 19 typedef struct
19 { 20 {
21 Uint8 stats[SET_MAX_ELEMS];
22 Uint8 variants, data;
23 int interest[16];
24 int interestF[16];
25 } DMCompElem;
26
27
28 typedef struct
29 {
30 int count;
31 Uint8 value;
32 } DMStatValue;
33
34
35 typedef struct
36 {
37 DMStatValue cv[SET_MAX_ELEMS];
38 } DMStats;
39
40
41 typedef struct
42 {
20 char *filename; 43 char *filename;
21 Uint8 *data; 44 Uint8 *data;
22 size_t size; // offset, crop_start, crop_end, doCrop? 45 size_t size; // offset, crop_start, crop_end, doCrop?
46 DMStats stats;
23 } DMSourceFile; 47 } DMSourceFile;
24 48
25 49
26 /* Global variables 50 /* Global variables
27 */ 51 */
28 int nsrcFiles = 0; // Number of source files 52 int nsrcFiles = 0; // Number of source files
29 DMSourceFile srcFiles[SET_MAX_FILES]; // Source file names 53 DMSourceFile srcFiles[SET_MAX_FILES]; // Source file names
54 DMStats totalStats;
30 55
31 56
32 /* Arguments 57 /* Arguments
33 */ 58 */
34 static const DMOptArg optList[] = 59 static const DMOptArg optList[] =
81 } 106 }
82 else 107 else
83 { 108 {
84 dmErrorMsg("Maximum number of input files exceeded (%d).\n", 109 dmErrorMsg("Maximum number of input files exceeded (%d).\n",
85 SET_MAX_FILES); 110 SET_MAX_FILES);
86 return FALSE; 111 return TRUE;
87 } 112 }
88 } 113 }
89 114
90 115
91 #define SET_MAX_ELEMS 256 116 void dmInitStats(DMStats *stats)
92 typedef struct 117 {
93 { 118 for (size_t n = 0; n < SET_MAX_ELEMS; n++)
94 Uint8 counts[SET_MAX_ELEMS]; 119 {
95 Uint8 variants, data; 120 stats->cv[n].count = 0;
96 } DMCompElem; 121 stats->cv[n].value = n;
122 }
123 }
124
125
126 int dmCompareStatFunc(const void *va, const void *vb)
127 {
128 const DMStatValue *pa = va, *pb = vb;
129 return pb->count - pa->count;
130 }
131
132
133 void dmSortStats(DMStats *stats)
134 {
135 qsort(&stats->cv, sizeof(DMStatValue), SET_MAX_ELEMS, dmCompareStatFunc);
136 }
137
138
139 void dmPrintStats(DMStats *stats, const int nmax, const size_t size)
140 {
141 for (int n = 0; n < nmax; n++)
142 {
143 printf("$%02x (%1.2f%%), ",
144 stats->cv[n].value,
145 ((float) stats->cv[n].count * 100.0f) / (float) size);
146 }
147 printf("\n\n");
148 }
97 149
98 150
99 int main(int argc, char *argv[]) 151 int main(int argc, char *argv[])
100 { 152 {
101 DMCompElem *compBuf = NULL; 153 DMCompElem *compBuf = NULL;
102 size_t compBufSize = 0; 154 size_t compBufSize = 0, totalSize = 0;
103 int res; 155 int res;
104 156
105 dmInitProg("fanalyze", "File format analyzer", "0.1", NULL, NULL); 157 dmInitProg("fanalyze", "File format analyzer", "0.1", NULL, NULL);
106 dmVerbosity = 1; 158 dmVerbosity = 1;
159
160 dmInitStats(&totalStats);
107 161
108 // Parse arguments 162 // Parse arguments
109 if (!dmArgsProcess(argc, argv, optList, optListN, 163 if (!dmArgsProcess(argc, argv, optList, optListN,
110 argHandleOpt, argHandleNonOpt, OPTH_BAILOUT)) 164 argHandleOpt, argHandleNonOpt, OPTH_BAILOUT))
111 exit(1); 165 exit(1);
130 dmPrint(2, "Input #%d: '%s', %" DM_PRIu_SIZE_T " bytes.\n", 184 dmPrint(2, "Input #%d: '%s', %" DM_PRIu_SIZE_T " bytes.\n",
131 nfile + 1, file->filename, file->size); 185 nfile + 1, file->filename, file->size);
132 186
133 if (!compBufSize || file->size < compBufSize) 187 if (!compBufSize || file->size < compBufSize)
134 compBufSize = file->size; 188 compBufSize = file->size;
189
190 totalSize += file->size;
135 } 191 }
136 192
137 // Allocate comparision buffer 193 // Allocate comparision buffer
138 // XXX: integer overflow? 194 // XXX: integer overflow?
139 dmPrint(2, "Allocating %d element (%d bytes) comparision buffer.\n", 195 dmPrint(2, "Allocating %d element (%d bytes) comparision buffer.\n",
148 // Begin analyzing .. 204 // Begin analyzing ..
149 dmPrint(2, "Analyzing ..\n"); 205 dmPrint(2, "Analyzing ..\n");
150 for (int nfile = 0; nfile < nsrcFiles; nfile++) 206 for (int nfile = 0; nfile < nsrcFiles; nfile++)
151 { 207 {
152 DMSourceFile *file = &srcFiles[nfile]; 208 DMSourceFile *file = &srcFiles[nfile];
209 dmInitStats(&file->stats);
210
211 for (size_t offs = 0; offs < file->size; offs++)
212 {
213 Uint8 bv = file->data[offs];
214 totalStats.cv[bv].count++;
215 file->stats.cv[bv].count++;
216 }
217
153 for (size_t offs = 0; offs < compBufSize; offs++) 218 for (size_t offs = 0; offs < compBufSize; offs++)
154 { 219 {
155 Uint8 data = offs < file->size ? file->data[offs] : 0; 220 Uint8 data = offs < file->size ? file->data[offs] : 0;
156 compBuf[offs].counts[data]++; 221 compBuf[offs].stats[data]++;
157 } 222 }
158 } 223
224 dmSortStats(&file->stats);
225 }
226 dmSortStats(&totalStats);
227
159 228
160 for (size_t offs = 0; offs < compBufSize; offs++) 229 for (size_t offs = 0; offs < compBufSize; offs++)
161 { 230 {
162 DMCompElem *el = &compBuf[offs]; 231 DMCompElem *el = &compBuf[offs];
163 for (int n = 0; n < SET_MAX_ELEMS; n++) 232 for (int n = 0; n < SET_MAX_ELEMS; n++)
164 { 233 {
165 if (el->counts[n] > 0) 234 if (el->stats[n] > 0)
166 { 235 {
167 el->variants++; 236 el->variants++;
168 el->data = n; 237 el->data = n;
169 } 238 }
170 } 239 }
191 } 260 }
192 } 261 }
193 262
194 printf("\n"); 263 printf("\n");
195 264
265 // Attempt further analysis
266 for (int nfile = 0; nfile < nsrcFiles; nfile++)
267 {
268 DMSourceFile *file = &srcFiles[nfile];
269 size_t len = file->size > compBufSize ? compBufSize : file->size;
270 for (size_t offs = 0; offs + 4 < len; offs++)
271 {
272 DMCompElem *elem = &compBuf[offs];
273
274 for (int variant = 3; variant >= 0; variant--)
275 {
276 size_t nmax = (variant < 2) ? sizeof(Uint16) : sizeof(Uint32);
277 Uint32 tmp = 0;
278
279 for (size_t n = 0; n < nmax; n++)
280 {
281 size_t boffs = (variant & 1) ? n : nmax - n;
282
283 tmp <<= 8;
284 tmp |= file->data[offs + boffs];
285 }
286
287 if (file->size - tmp < 32)
288 {
289 elem->interest[variant] += 32 - (file->size - tmp);
290 elem->interestF[variant]++;
291 }
292 }
293 }
294 }
295
296 printf("\nMore findings:\n");
297 for (size_t offs = 0; offs + 4 < compBufSize; offs++)
298 {
299 DMCompElem *elem = &compBuf[offs];
300
301 for (int variant = 0; variant < 4; variant++)
302 if (elem->interestF[variant] > 0)
303 {
304 printf("%08" DM_PRIx_SIZE_T " | V%d : %d / %d\n",
305 offs, variant,
306 elem->interestF[variant], elem->interest[variant]);
307 }
308 }
309
310 printf("\nGlobal most used bytes:\n");
311 dmPrintStats(&totalStats, 16, totalSize);
312
313 for (int nfile = 0; nfile < nsrcFiles; nfile++)
314 {
315 DMSourceFile *file = &srcFiles[nfile];
316 printf("Most used bytes for '%s':\n", file->filename);
317 dmPrintStats(&file->stats, 16, file->size);
318 }
319
196 out: 320 out:
197 for (int nfile = 0; nfile < nsrcFiles; nfile++) 321 for (int nfile = 0; nfile < nsrcFiles; nfile++)
198 { 322 {
199 DMSourceFile *file = &srcFiles[nfile]; 323 DMSourceFile *file = &srcFiles[nfile];
200 dmFree(file->data); 324 dmFree(file->data);