0
|
1 /*****************************************************************************
|
|
2 * Copyright (C) 2013 x265 project
|
|
3 *
|
|
4 * Authors: Steve Borho <steve@borho.org>
|
|
5 *
|
|
6 * This program is free software; you can redistribute it and/or modify
|
|
7 * it under the terms of the GNU General Public License as published by
|
|
8 * the Free Software Foundation; either version 2 of the License, or
|
|
9 * (at your option) any later version.
|
|
10 *
|
|
11 * This program is distributed in the hope that it will be useful,
|
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 * GNU General Public License for more details.
|
|
15 *
|
|
16 * You should have received a copy of the GNU General Public License
|
|
17 * along with this program; if not, write to the Free Software
|
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
|
19 *
|
|
20 * This program is also available under a commercial proprietary license.
|
|
21 * For more information, contact us at license @ x265.com.
|
|
22 *****************************************************************************/
|
|
23
|
|
24 #include "pixelharness.h"
|
|
25 #include "primitives.h"
|
|
26 #include "entropy.h"
|
|
27
|
|
28 using namespace X265_NS;
|
|
29
|
|
30 PixelHarness::PixelHarness()
|
|
31 {
|
|
32 /* [0] --- Random values
|
|
33 * [1] --- Minimum
|
|
34 * [2] --- Maximum */
|
|
35 for (int i = 0; i < BUFFSIZE; i++)
|
|
36 {
|
|
37 pixel_test_buff[0][i] = rand() % PIXEL_MAX;
|
|
38 short_test_buff[0][i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; // max(SHORT_MIN, min(rand(), SMAX));
|
|
39 short_test_buff1[0][i] = rand() & PIXEL_MAX; // For block copy only
|
|
40 short_test_buff2[0][i] = rand() % 16383; // for addAvg
|
|
41 int_test_buff[0][i] = rand() % SHORT_MAX;
|
|
42 ushort_test_buff[0][i] = rand() % ((1 << 16) - 1);
|
|
43 uchar_test_buff[0][i] = rand() % ((1 << 8) - 1);
|
|
44
|
|
45 pixel_test_buff[1][i] = PIXEL_MIN;
|
|
46 short_test_buff[1][i] = SMIN;
|
|
47 short_test_buff1[1][i] = PIXEL_MIN;
|
|
48 short_test_buff2[1][i] = -16384;
|
|
49 int_test_buff[1][i] = SHORT_MIN;
|
|
50 ushort_test_buff[1][i] = PIXEL_MIN;
|
|
51 uchar_test_buff[1][i] = PIXEL_MIN;
|
|
52
|
|
53 pixel_test_buff[2][i] = PIXEL_MAX;
|
|
54 short_test_buff[2][i] = SMAX;
|
|
55 short_test_buff1[2][i] = PIXEL_MAX;
|
|
56 short_test_buff2[2][i] = 16383;
|
|
57 int_test_buff[2][i] = SHORT_MAX;
|
|
58 ushort_test_buff[2][i] = ((1 << 16) - 1);
|
|
59 uchar_test_buff[2][i] = 255;
|
|
60
|
|
61 pbuf1[i] = rand() & PIXEL_MAX;
|
|
62 pbuf2[i] = rand() & PIXEL_MAX;
|
|
63 pbuf3[i] = rand() & PIXEL_MAX;
|
|
64 pbuf4[i] = rand() & PIXEL_MAX;
|
|
65
|
|
66 sbuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX));
|
|
67 sbuf2[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX));
|
|
68 ibuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1;
|
|
69 psbuf1[i] = psbuf4[i] = (rand() % 65) - 32; // range is between -32 to 32
|
|
70 psbuf2[i] = psbuf5[i] = (rand() % 3) - 1; // possible values {-1,0,1}
|
|
71 psbuf3[i] = (rand() % 129) - 128;
|
|
72 sbuf3[i] = rand() % PIXEL_MAX; // for blockcopy only
|
|
73 }
|
|
74 }
|
|
75
|
|
76 bool PixelHarness::check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt)
|
|
77 {
|
|
78 int j = 0;
|
|
79 intptr_t stride = STRIDE;
|
|
80
|
|
81 for (int i = 0; i < ITERS; i++)
|
|
82 {
|
|
83 int index1 = rand() % TEST_CASES;
|
|
84 int index2 = rand() % TEST_CASES;
|
|
85 int vres = (int)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
|
|
86 int cres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
|
|
87 if (vres != cres)
|
|
88 return false;
|
|
89
|
|
90 reportfail();
|
|
91 j += INCR;
|
|
92 }
|
|
93
|
|
94 return true;
|
|
95 }
|
|
96
|
|
97 bool PixelHarness::check_pixel_sse(pixel_sse_t ref, pixel_sse_t opt)
|
|
98 {
|
|
99 int j = 0;
|
|
100 intptr_t stride = STRIDE;
|
|
101
|
|
102 for (int i = 0; i < ITERS; i++)
|
|
103 {
|
|
104 int index1 = rand() % TEST_CASES;
|
|
105 int index2 = rand() % TEST_CASES;
|
|
106 sse_ret_t vres = (sse_ret_t)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
|
|
107 sse_ret_t cres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
|
|
108 if (vres != cres)
|
|
109 return false;
|
|
110
|
|
111 reportfail();
|
|
112 j += INCR;
|
|
113 }
|
|
114
|
|
115 return true;
|
|
116 }
|
|
117
|
|
118 bool PixelHarness::check_pixel_sse_ss(pixel_sse_ss_t ref, pixel_sse_ss_t opt)
|
|
119 {
|
|
120 int j = 0;
|
|
121 intptr_t stride = STRIDE;
|
|
122
|
|
123 for (int i = 0; i < ITERS; i++)
|
|
124 {
|
|
125 int index1 = rand() % TEST_CASES;
|
|
126 int index2 = rand() % TEST_CASES;
|
|
127 sse_ret_t vres = (sse_ret_t)checked(opt, short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
|
|
128 sse_ret_t cres = ref(short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
|
|
129 if (vres != cres)
|
|
130 return false;
|
|
131
|
|
132 reportfail();
|
|
133 j += INCR;
|
|
134 }
|
|
135
|
|
136 return true;
|
|
137 }
|
|
138
|
|
139 bool PixelHarness::check_pixelcmp_x3(pixelcmp_x3_t ref, pixelcmp_x3_t opt)
|
|
140 {
|
|
141 ALIGN_VAR_16(int, cres[16]);
|
|
142 ALIGN_VAR_16(int, vres[16]);
|
|
143 int j = 0;
|
|
144 intptr_t stride = FENC_STRIDE - 5;
|
|
145 for (int i = 0; i < ITERS; i++)
|
|
146 {
|
|
147 int index1 = rand() % TEST_CASES;
|
|
148 int index2 = rand() % TEST_CASES;
|
|
149 checked(opt, pixel_test_buff[index1],
|
|
150 pixel_test_buff[index2] + j,
|
|
151 pixel_test_buff[index2] + j + 1,
|
|
152 pixel_test_buff[index2] + j + 2, stride, &vres[0]);
|
|
153 ref(pixel_test_buff[index1],
|
|
154 pixel_test_buff[index2] + j,
|
|
155 pixel_test_buff[index2] + j + 1,
|
|
156 pixel_test_buff[index2] + j + 2, stride, &cres[0]);
|
|
157 if ((vres[0] != cres[0]) || ((vres[1] != cres[1])) || ((vres[2] != cres[2])))
|
|
158 return false;
|
|
159
|
|
160 reportfail();
|
|
161 j += INCR;
|
|
162 }
|
|
163
|
|
164 return true;
|
|
165 }
|
|
166
|
|
167 bool PixelHarness::check_pixelcmp_x4(pixelcmp_x4_t ref, pixelcmp_x4_t opt)
|
|
168 {
|
|
169 ALIGN_VAR_16(int, cres[16]);
|
|
170 ALIGN_VAR_16(int, vres[16]);
|
|
171 int j = 0;
|
|
172 intptr_t stride = FENC_STRIDE - 5;
|
|
173 for (int i = 0; i < ITERS; i++)
|
|
174 {
|
|
175 int index1 = rand() % TEST_CASES;
|
|
176 int index2 = rand() % TEST_CASES;
|
|
177 checked(opt, pixel_test_buff[index1],
|
|
178 pixel_test_buff[index2] + j,
|
|
179 pixel_test_buff[index2] + j + 1,
|
|
180 pixel_test_buff[index2] + j + 2,
|
|
181 pixel_test_buff[index2] + j + 3, stride, &vres[0]);
|
|
182 ref(pixel_test_buff[index1],
|
|
183 pixel_test_buff[index2] + j,
|
|
184 pixel_test_buff[index2] + j + 1,
|
|
185 pixel_test_buff[index2] + j + 2,
|
|
186 pixel_test_buff[index2] + j + 3, stride, &cres[0]);
|
|
187
|
|
188 if ((vres[0] != cres[0]) || ((vres[1] != cres[1])) || ((vres[2] != cres[2])) || ((vres[3] != cres[3])))
|
|
189 return false;
|
|
190
|
|
191 reportfail();
|
|
192 j += INCR;
|
|
193 }
|
|
194
|
|
195 return true;
|
|
196 }
|
|
197
|
|
198 bool PixelHarness::check_calresidual(calcresidual_t ref, calcresidual_t opt)
|
|
199 {
|
|
200 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
201 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
202 memset(ref_dest, 0, 64 * 64 * sizeof(int16_t));
|
|
203 memset(opt_dest, 0, 64 * 64 * sizeof(int16_t));
|
|
204
|
|
205 int j = 0;
|
|
206 intptr_t stride = STRIDE;
|
|
207 for (int i = 0; i < ITERS; i++)
|
|
208 {
|
|
209 int index = i % TEST_CASES;
|
|
210 checked(opt, pbuf1 + j, pixel_test_buff[index] + j, opt_dest, stride);
|
|
211 ref(pbuf1 + j, pixel_test_buff[index] + j, ref_dest, stride);
|
|
212
|
|
213 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
214 return false;
|
|
215
|
|
216 reportfail();
|
|
217 j += INCR;
|
|
218 }
|
|
219
|
|
220 return true;
|
|
221 }
|
|
222
|
|
223 bool PixelHarness::check_ssd_s(pixel_ssd_s_t ref, pixel_ssd_s_t opt)
|
|
224 {
|
|
225 int j = 0;
|
|
226 for (int i = 0; i < ITERS; i++)
|
|
227 {
|
|
228 // NOTE: stride must be multiple of 16, because minimum block is 4x4
|
|
229 int stride = (STRIDE + (rand() % STRIDE)) & ~15;
|
|
230 int cres = ref(sbuf1 + j, stride);
|
|
231 int vres = (int)checked(opt, sbuf1 + j, (intptr_t)stride);
|
|
232
|
|
233 if (cres != vres)
|
|
234 return false;
|
|
235
|
|
236 reportfail();
|
|
237 j += INCR;
|
|
238 }
|
|
239
|
|
240 return true;
|
|
241 }
|
|
242
|
|
243 bool PixelHarness::check_weightp(weightp_sp_t ref, weightp_sp_t opt)
|
|
244 {
|
|
245 ALIGN_VAR_16(pixel, ref_dest[64 * (64 + 1)]);
|
|
246 ALIGN_VAR_16(pixel, opt_dest[64 * (64 + 1)]);
|
|
247
|
|
248 memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
|
|
249 memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
|
|
250 int j = 0;
|
|
251 int width = 2 * (rand() % 32 + 1);
|
|
252 int height = 8;
|
|
253 int w0 = rand() % 128;
|
|
254 int shift = rand() % 8; // maximum is 7, see setFromWeightAndOffset()
|
|
255 int round = shift ? (1 << (shift - 1)) : 0;
|
|
256 int offset = (rand() % 256) - 128;
|
|
257 intptr_t stride = 64;
|
|
258 const int correction = (IF_INTERNAL_PREC - X265_DEPTH);
|
|
259
|
|
260 for (int i = 0; i < ITERS; i++)
|
|
261 {
|
|
262 int index = i % TEST_CASES;
|
|
263 checked(opt, short_test_buff[index] + j, opt_dest, stride, stride + 1, width, height, w0, round << correction, shift + correction, offset);
|
|
264 ref(short_test_buff[index] + j, ref_dest, stride, stride + 1, width, height, w0, round << correction, shift + correction, offset);
|
|
265
|
|
266 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
267 {
|
|
268 opt(short_test_buff[index] + j, opt_dest, stride, stride + 1, width, height, w0, round << correction, shift + correction, offset);
|
|
269 return false;
|
|
270 }
|
|
271
|
|
272 reportfail();
|
|
273 j += INCR;
|
|
274 }
|
|
275
|
|
276 return true;
|
|
277 }
|
|
278
|
|
279 bool PixelHarness::check_weightp(weightp_pp_t ref, weightp_pp_t opt)
|
|
280 {
|
|
281 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
282 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
283
|
|
284 memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
|
|
285 memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
|
|
286 int j = 0;
|
|
287 int width = 16 * (rand() % 4 + 1);
|
|
288 int height = 8;
|
|
289 int w0 = rand() % 128;
|
|
290 int shift = rand() % 8; // maximum is 7, see setFromWeightAndOffset()
|
|
291 int round = shift ? (1 << (shift - 1)) : 0;
|
|
292 int offset = (rand() % 256) - 128;
|
|
293 intptr_t stride = 64;
|
|
294 const int correction = (IF_INTERNAL_PREC - X265_DEPTH);
|
|
295 for (int i = 0; i < ITERS; i++)
|
|
296 {
|
|
297 int index = i % TEST_CASES;
|
|
298 checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round << correction, shift + correction, offset);
|
|
299 ref(pixel_test_buff[index] + j, ref_dest, stride, width, height, w0, round << correction, shift + correction, offset);
|
|
300
|
|
301 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
302 {
|
|
303 checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round << correction, shift + correction, offset);
|
|
304 return false;
|
|
305 }
|
|
306
|
|
307 reportfail();
|
|
308 j += INCR;
|
|
309 }
|
|
310
|
|
311 return true;
|
|
312 }
|
|
313
|
|
314 bool PixelHarness::check_downscale_t(downscale_t ref, downscale_t opt)
|
|
315 {
|
|
316 ALIGN_VAR_16(pixel, ref_destf[32 * 32]);
|
|
317 ALIGN_VAR_16(pixel, opt_destf[32 * 32]);
|
|
318
|
|
319 ALIGN_VAR_16(pixel, ref_desth[32 * 32]);
|
|
320 ALIGN_VAR_16(pixel, opt_desth[32 * 32]);
|
|
321
|
|
322 ALIGN_VAR_16(pixel, ref_destv[32 * 32]);
|
|
323 ALIGN_VAR_16(pixel, opt_destv[32 * 32]);
|
|
324
|
|
325 ALIGN_VAR_16(pixel, ref_destc[32 * 32]);
|
|
326 ALIGN_VAR_16(pixel, opt_destc[32 * 32]);
|
|
327
|
|
328 intptr_t src_stride = 64;
|
|
329 intptr_t dst_stride = 32;
|
|
330 int bx = 32;
|
|
331 int by = 32;
|
|
332 int j = 0;
|
|
333 for (int i = 0; i < ITERS; i++)
|
|
334 {
|
|
335 int index = i % TEST_CASES;
|
|
336 ref(pixel_test_buff[index] + j, ref_destf, ref_desth, ref_destv,
|
|
337 ref_destc, src_stride, dst_stride, bx, by);
|
|
338 checked(opt, pixel_test_buff[index] + j, opt_destf, opt_desth, opt_destv,
|
|
339 opt_destc, src_stride, dst_stride, bx, by);
|
|
340
|
|
341 if (memcmp(ref_destf, opt_destf, 32 * 32 * sizeof(pixel)))
|
|
342 return false;
|
|
343 if (memcmp(ref_desth, opt_desth, 32 * 32 * sizeof(pixel)))
|
|
344 return false;
|
|
345 if (memcmp(ref_destv, opt_destv, 32 * 32 * sizeof(pixel)))
|
|
346 return false;
|
|
347 if (memcmp(ref_destc, opt_destc, 32 * 32 * sizeof(pixel)))
|
|
348 return false;
|
|
349
|
|
350 reportfail();
|
|
351 j += INCR;
|
|
352 }
|
|
353
|
|
354 return true;
|
|
355 }
|
|
356
|
|
357 bool PixelHarness::check_cpy2Dto1D_shl_t(cpy2Dto1D_shl_t ref, cpy2Dto1D_shl_t opt)
|
|
358 {
|
|
359 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
360 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
361
|
|
362 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
363 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
364
|
|
365 int j = 0;
|
|
366 intptr_t stride = STRIDE;
|
|
367 for (int i = 0; i < ITERS; i++)
|
|
368 {
|
|
369 int shift = (rand() % 7 + 1);
|
|
370
|
|
371 int index = i % TEST_CASES;
|
|
372 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
|
|
373 ref(ref_dest, short_test_buff[index] + j, stride, shift);
|
|
374
|
|
375 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
376 return false;
|
|
377
|
|
378 reportfail();
|
|
379 j += INCR;
|
|
380 }
|
|
381
|
|
382 return true;
|
|
383 }
|
|
384
|
|
385 bool PixelHarness::check_cpy2Dto1D_shr_t(cpy2Dto1D_shr_t ref, cpy2Dto1D_shr_t opt)
|
|
386 {
|
|
387 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
388 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
389
|
|
390 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
391 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
392
|
|
393 int j = 0;
|
|
394 intptr_t stride = STRIDE;
|
|
395 for (int i = 0; i < ITERS; i++)
|
|
396 {
|
|
397 int shift = (rand() % 7 + 1);
|
|
398
|
|
399 int index = i % TEST_CASES;
|
|
400 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
|
|
401 ref(ref_dest, short_test_buff[index] + j, stride, shift);
|
|
402
|
|
403 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
404 return false;
|
|
405
|
|
406 reportfail();
|
|
407 j += INCR;
|
|
408 }
|
|
409
|
|
410 return true;
|
|
411 }
|
|
412
|
|
413 bool PixelHarness::check_copy_cnt_t(copy_cnt_t ref, copy_cnt_t opt)
|
|
414 {
|
|
415 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
416 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
417
|
|
418 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
419 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
420
|
|
421 int j = 0;
|
|
422 intptr_t stride = STRIDE;
|
|
423 for (int i = 0; i < ITERS; i++)
|
|
424 {
|
|
425 int index = i % TEST_CASES;
|
|
426 int opt_cnt = (int)checked(opt, opt_dest, short_test_buff1[index] + j, stride);
|
|
427 int ref_cnt = ref(ref_dest, short_test_buff1[index] + j, stride);
|
|
428
|
|
429 if ((ref_cnt != opt_cnt) || memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
430 return false;
|
|
431
|
|
432 reportfail();
|
|
433 j += INCR;
|
|
434 }
|
|
435
|
|
436 return true;
|
|
437 }
|
|
438
|
|
439 bool PixelHarness::check_cpy1Dto2D_shl_t(cpy1Dto2D_shl_t ref, cpy1Dto2D_shl_t opt)
|
|
440 {
|
|
441 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
442 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
443
|
|
444 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
445 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
446
|
|
447 int j = 0;
|
|
448 intptr_t stride = STRIDE;
|
|
449 for (int i = 0; i < ITERS; i++)
|
|
450 {
|
|
451 int shift = (rand() % 7 + 1);
|
|
452
|
|
453 int index = i % TEST_CASES;
|
|
454 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
|
|
455 ref(ref_dest, short_test_buff[index] + j, stride, shift);
|
|
456
|
|
457 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
458 return false;
|
|
459
|
|
460 reportfail();
|
|
461 j += INCR;
|
|
462 }
|
|
463
|
|
464 return true;
|
|
465 }
|
|
466
|
|
467 bool PixelHarness::check_cpy1Dto2D_shr_t(cpy1Dto2D_shr_t ref, cpy1Dto2D_shr_t opt)
|
|
468 {
|
|
469 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
470 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
471
|
|
472 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
473 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
474
|
|
475 int j = 0;
|
|
476 intptr_t stride = STRIDE;
|
|
477 for (int i = 0; i < ITERS; i++)
|
|
478 {
|
|
479 int shift = (rand() % 7 + 1);
|
|
480
|
|
481 int index = i % TEST_CASES;
|
|
482 checked(opt, opt_dest, short_test_buff[index] + j, stride, shift);
|
|
483 ref(ref_dest, short_test_buff[index] + j, stride, shift);
|
|
484
|
|
485 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
486 return false;
|
|
487
|
|
488 reportfail();
|
|
489 j += INCR;
|
|
490 }
|
|
491
|
|
492 return true;
|
|
493 }
|
|
494
|
|
495 bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt)
|
|
496 {
|
|
497 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
498 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
499
|
|
500 int j = 0;
|
|
501
|
|
502 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
503 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
504
|
|
505 intptr_t stride = STRIDE;
|
|
506 for (int i = 0; i < ITERS; i++)
|
|
507 {
|
|
508 int index1 = rand() % TEST_CASES;
|
|
509 int index2 = rand() % TEST_CASES;
|
|
510 checked(ref, ref_dest, stride, pixel_test_buff[index1] + j,
|
|
511 stride, pixel_test_buff[index2] + j, stride, 32);
|
|
512 opt(opt_dest, stride, pixel_test_buff[index1] + j,
|
|
513 stride, pixel_test_buff[index2] + j, stride, 32);
|
|
514
|
|
515 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
516 return false;
|
|
517
|
|
518 reportfail();
|
|
519 j += INCR;
|
|
520 }
|
|
521
|
|
522 return true;
|
|
523 }
|
|
524
|
|
525 bool PixelHarness::check_copy_pp(copy_pp_t ref, copy_pp_t opt)
|
|
526 {
|
|
527 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
528 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
529
|
|
530 // we don't know the partition size so we are checking the entire output buffer so
|
|
531 // we must initialize the buffers
|
|
532 memset(ref_dest, 0, sizeof(ref_dest));
|
|
533 memset(opt_dest, 0, sizeof(opt_dest));
|
|
534
|
|
535 int j = 0;
|
|
536 intptr_t stride = STRIDE;
|
|
537 for (int i = 0; i < ITERS; i++)
|
|
538 {
|
|
539 int index = i % TEST_CASES;
|
|
540 checked(opt, opt_dest, stride, pixel_test_buff[index] + j, stride);
|
|
541 ref(ref_dest, stride, pixel_test_buff[index] + j, stride);
|
|
542
|
|
543 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
544 return false;
|
|
545
|
|
546 reportfail();
|
|
547 j += INCR;
|
|
548 }
|
|
549
|
|
550 return true;
|
|
551 }
|
|
552
|
|
553 bool PixelHarness::check_copy_sp(copy_sp_t ref, copy_sp_t opt)
|
|
554 {
|
|
555 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
556 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
557
|
|
558 // we don't know the partition size so we are checking the entire output buffer so
|
|
559 // we must initialize the buffers
|
|
560 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
561 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
562
|
|
563 int j = 0;
|
|
564 intptr_t stride1 = 64, stride2 = STRIDE;
|
|
565 for (int i = 0; i < ITERS; i++)
|
|
566 {
|
|
567 int index = i % TEST_CASES;
|
|
568 checked(opt, opt_dest, stride1, short_test_buff1[index] + j, stride2);
|
|
569 ref(ref_dest, stride1, short_test_buff1[index] + j, stride2);
|
|
570
|
|
571 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
572 return false;
|
|
573
|
|
574 reportfail();
|
|
575 j += INCR;
|
|
576 }
|
|
577
|
|
578 return true;
|
|
579 }
|
|
580
|
|
581 bool PixelHarness::check_copy_ps(copy_ps_t ref, copy_ps_t opt)
|
|
582 {
|
|
583 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
584 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
585
|
|
586 // we don't know the partition size so we are checking the entire output buffer so
|
|
587 // we must initialize the buffers
|
|
588 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
589 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
590
|
|
591 int j = 0;
|
|
592 intptr_t stride = STRIDE;
|
|
593 for (int i = 0; i < ITERS; i++)
|
|
594 {
|
|
595 int index = i % TEST_CASES;
|
|
596 checked(opt, opt_dest, stride, pixel_test_buff[index] + j, stride);
|
|
597 ref(ref_dest, stride, pixel_test_buff[index] + j, stride);
|
|
598
|
|
599 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
600 return false;
|
|
601
|
|
602 reportfail();
|
|
603 j += INCR;
|
|
604 }
|
|
605
|
|
606 return true;
|
|
607 }
|
|
608
|
|
609 bool PixelHarness::check_copy_ss(copy_ss_t ref, copy_ss_t opt)
|
|
610 {
|
|
611 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
612 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
613
|
|
614 // we don't know the partition size so we are checking the entire output buffer so
|
|
615 // we must initialize the buffers
|
|
616 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
617 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
618
|
|
619 int j = 0;
|
|
620 intptr_t stride = STRIDE;
|
|
621 for (int i = 0; i < ITERS; i++)
|
|
622 {
|
|
623 int index = i % TEST_CASES;
|
|
624 checked(opt, opt_dest, stride, short_test_buff1[index] + j, stride);
|
|
625 ref(ref_dest, stride, short_test_buff1[index] + j, stride);
|
|
626
|
|
627 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
628 return false;
|
|
629
|
|
630 reportfail();
|
|
631 j += INCR;
|
|
632 }
|
|
633
|
|
634 return true;
|
|
635 }
|
|
636
|
|
637 bool PixelHarness::check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt)
|
|
638 {
|
|
639 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
640 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
641
|
|
642 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
643 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
644
|
|
645 intptr_t stride = 64;
|
|
646 for (int i = 0; i < ITERS; i++)
|
|
647 {
|
|
648 int16_t value = (rand() % SHORT_MAX) + 1;
|
|
649
|
|
650 checked(opt, opt_dest, stride, value);
|
|
651 ref(ref_dest, stride, value);
|
|
652
|
|
653 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
654 return false;
|
|
655
|
|
656 reportfail();
|
|
657 }
|
|
658
|
|
659 return true;
|
|
660 }
|
|
661
|
|
662 bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt)
|
|
663 {
|
|
664 ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
|
|
665 ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
|
|
666
|
|
667 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
668 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
669
|
|
670 int j = 0;
|
|
671 intptr_t stride2 = 64, stride = STRIDE;
|
|
672 for (int i = 0; i < 1; i++)
|
|
673 {
|
|
674 int index1 = rand() % TEST_CASES;
|
|
675 int index2 = rand() % TEST_CASES;
|
|
676 checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j,
|
|
677 pixel_test_buff[index2] + j, stride, stride);
|
|
678 ref(ref_dest, stride2, pixel_test_buff[index1] + j,
|
|
679 pixel_test_buff[index2] + j, stride, stride);
|
|
680
|
|
681 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
|
|
682 return false;
|
|
683
|
|
684 reportfail();
|
|
685 j += INCR;
|
|
686 }
|
|
687
|
|
688 return true;
|
|
689 }
|
|
690
|
|
691 bool PixelHarness::check_scale1D_pp(scale1D_t ref, scale1D_t opt)
|
|
692 {
|
|
693 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
694 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
695
|
|
696 memset(ref_dest, 0, sizeof(ref_dest));
|
|
697 memset(opt_dest, 0, sizeof(opt_dest));
|
|
698
|
|
699 int j = 0;
|
|
700 for (int i = 0; i < ITERS; i++)
|
|
701 {
|
|
702 int index = i % TEST_CASES;
|
|
703 checked(opt, opt_dest, pixel_test_buff[index] + j);
|
|
704 ref(ref_dest, pixel_test_buff[index] + j);
|
|
705
|
|
706 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
707 return false;
|
|
708
|
|
709 reportfail();
|
|
710 j += INCR;
|
|
711 }
|
|
712
|
|
713 return true;
|
|
714 }
|
|
715
|
|
716 bool PixelHarness::check_scale2D_pp(scale2D_t ref, scale2D_t opt)
|
|
717 {
|
|
718 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
719 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
720
|
|
721 memset(ref_dest, 0, sizeof(ref_dest));
|
|
722 memset(opt_dest, 0, sizeof(opt_dest));
|
|
723
|
|
724 int j = 0;
|
|
725 intptr_t stride = STRIDE;
|
|
726 for (int i = 0; i < ITERS; i++)
|
|
727 {
|
|
728 int index = i % TEST_CASES;
|
|
729 checked(opt, opt_dest, pixel_test_buff[index] + j, stride);
|
|
730 ref(ref_dest, pixel_test_buff[index] + j, stride);
|
|
731
|
|
732 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
733 return false;
|
|
734
|
|
735 reportfail();
|
|
736 j += INCR;
|
|
737 }
|
|
738
|
|
739 return true;
|
|
740 }
|
|
741
|
|
742 bool PixelHarness::check_transpose(transpose_t ref, transpose_t opt)
|
|
743 {
|
|
744 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
745 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
746
|
|
747 memset(ref_dest, 0, sizeof(ref_dest));
|
|
748 memset(opt_dest, 0, sizeof(opt_dest));
|
|
749
|
|
750 int j = 0;
|
|
751 intptr_t stride = STRIDE;
|
|
752 for (int i = 0; i < ITERS; i++)
|
|
753 {
|
|
754 int index = i % TEST_CASES;
|
|
755 checked(opt, opt_dest, pixel_test_buff[index] + j, stride);
|
|
756 ref(ref_dest, pixel_test_buff[index] + j, stride);
|
|
757
|
|
758 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
759 return false;
|
|
760
|
|
761 reportfail();
|
|
762 j += INCR;
|
|
763 }
|
|
764
|
|
765 return true;
|
|
766 }
|
|
767
|
|
768 bool PixelHarness::check_pixel_add_ps(pixel_add_ps_t ref, pixel_add_ps_t opt)
|
|
769 {
|
|
770 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
771 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
772
|
|
773 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
774 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
775
|
|
776 int j = 0;
|
|
777 intptr_t stride2 = 64, stride = STRIDE;
|
|
778 for (int i = 0; i < ITERS; i++)
|
|
779 {
|
|
780 int index1 = rand() % TEST_CASES;
|
|
781 int index2 = rand() % TEST_CASES;
|
|
782 checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride);
|
|
783 ref(ref_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride);
|
|
784
|
|
785 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
786 return false;
|
|
787
|
|
788 reportfail();
|
|
789 j += INCR;
|
|
790 }
|
|
791
|
|
792 return true;
|
|
793 }
|
|
794
|
|
795 bool PixelHarness::check_pixel_var(var_t ref, var_t opt)
|
|
796 {
|
|
797 int j = 0;
|
|
798
|
|
799 intptr_t stride = STRIDE;
|
|
800
|
|
801 for (int i = 0; i < ITERS; i++)
|
|
802 {
|
|
803 int index = i % TEST_CASES;
|
|
804 uint64_t vres = checked(opt, pixel_test_buff[index], stride);
|
|
805 uint64_t cres = ref(pixel_test_buff[index], stride);
|
|
806 if (vres != cres)
|
|
807 return false;
|
|
808
|
|
809 reportfail();
|
|
810 j += INCR;
|
|
811 }
|
|
812
|
|
813 return true;
|
|
814 }
|
|
815
|
|
816 bool PixelHarness::check_ssim_4x4x2_core(ssim_4x4x2_core_t ref, ssim_4x4x2_core_t opt)
|
|
817 {
|
|
818 ALIGN_VAR_32(int, sum0[2][4]);
|
|
819 ALIGN_VAR_32(int, sum1[2][4]);
|
|
820
|
|
821 for (int i = 0; i < ITERS; i++)
|
|
822 {
|
|
823 intptr_t stride = rand() % 64;
|
|
824 int index1 = rand() % TEST_CASES;
|
|
825 int index2 = rand() % TEST_CASES;
|
|
826 ref(pixel_test_buff[index1] + i, stride, pixel_test_buff[index2] + i, stride, sum0);
|
|
827 checked(opt, pixel_test_buff[index1] + i, stride, pixel_test_buff[index2] + i, stride, sum1);
|
|
828
|
|
829 if (memcmp(sum0, sum1, sizeof(sum0)))
|
|
830 return false;
|
|
831
|
|
832 reportfail();
|
|
833 }
|
|
834
|
|
835 return true;
|
|
836 }
|
|
837
|
|
838 bool PixelHarness::check_ssim_end(ssim_end4_t ref, ssim_end4_t opt)
|
|
839 {
|
|
840 ALIGN_VAR_32(int, sum0[5][4]);
|
|
841 ALIGN_VAR_32(int, sum1[5][4]);
|
|
842
|
|
843 for (int i = 0; i < ITERS; i++)
|
|
844 {
|
|
845 for (int j = 0; j < 5; j++)
|
|
846 {
|
|
847 for (int k = 0; k < 4; k++)
|
|
848 {
|
|
849 sum0[j][k] = rand() % (1 << 12);
|
|
850 sum1[j][k] = rand() % (1 << 12);
|
|
851 }
|
|
852 }
|
|
853
|
|
854 int width = (rand() % 4) + 1; // range[1-4]
|
|
855 float cres = ref(sum0, sum1, width);
|
|
856 float vres = checked_float(opt, sum0, sum1, width);
|
|
857 if (fabs(vres - cres) > 0.00001)
|
|
858 return false;
|
|
859
|
|
860 reportfail();
|
|
861 }
|
|
862
|
|
863 return true;
|
|
864 }
|
|
865
|
|
866 bool PixelHarness::check_addAvg(addAvg_t ref, addAvg_t opt)
|
|
867 {
|
|
868 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
869 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
870
|
|
871 int j = 0;
|
|
872
|
|
873 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
874 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
875 intptr_t stride = STRIDE;
|
|
876
|
|
877 for (int i = 0; i < ITERS; i++)
|
|
878 {
|
|
879 int index1 = rand() % TEST_CASES;
|
|
880 int index2 = rand() % TEST_CASES;
|
|
881 ref(short_test_buff2[index1] + j, short_test_buff2[index2] + j, ref_dest, stride, stride, stride);
|
|
882 checked(opt, short_test_buff2[index1] + j, short_test_buff2[index2] + j, opt_dest, stride, stride, stride);
|
|
883 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
884 return false;
|
|
885
|
|
886 reportfail();
|
|
887 j += INCR;
|
|
888 }
|
|
889
|
|
890 return true;
|
|
891 }
|
|
892
|
|
893 bool PixelHarness::check_calSign(sign_t ref, sign_t opt)
|
|
894 {
|
|
895 ALIGN_VAR_16(int8_t, ref_dest[64 * 2]);
|
|
896 ALIGN_VAR_16(int8_t, opt_dest[64 * 2]);
|
|
897
|
|
898 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
899 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
900
|
|
901 int j = 0;
|
|
902
|
|
903 for (int i = 0; i < ITERS; i++)
|
|
904 {
|
|
905 int width = (rand() % 64) + 1;
|
|
906
|
|
907 ref(ref_dest, pbuf2 + j, pbuf3 + j, width);
|
|
908 checked(opt, opt_dest, pbuf2 + j, pbuf3 + j, width);
|
|
909
|
|
910 if (memcmp(ref_dest, opt_dest, sizeof(ref_dest)))
|
|
911 return false;
|
|
912
|
|
913 reportfail();
|
|
914 j += INCR;
|
|
915 }
|
|
916
|
|
917 return true;
|
|
918 }
|
|
919
|
|
920 bool PixelHarness::check_saoCuOrgE0_t(saoCuOrgE0_t ref, saoCuOrgE0_t opt)
|
|
921 {
|
|
922 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
923 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
924
|
|
925 for (int i = 0; i < 64 * 64; i++)
|
|
926 ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
|
|
927
|
|
928 int j = 0;
|
|
929
|
|
930 for (int i = 0; i < ITERS; i++)
|
|
931 {
|
|
932 int width = 16 * (rand() % 4 + 1);
|
|
933 int stride = width + 1;
|
|
934
|
|
935 ref(ref_dest, psbuf1 + j, width, psbuf2 + j, stride);
|
|
936 checked(opt, opt_dest, psbuf1 + j, width, psbuf5 + j, stride);
|
|
937
|
|
938 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
939 return false;
|
|
940
|
|
941 reportfail();
|
|
942 j += INCR;
|
|
943 }
|
|
944
|
|
945 return true;
|
|
946 }
|
|
947
|
|
948 bool PixelHarness::check_saoCuOrgE1_t(saoCuOrgE1_t ref, saoCuOrgE1_t opt)
|
|
949 {
|
|
950 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
951 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
952
|
|
953 for (int i = 0; i < 64 * 64; i++)
|
|
954 ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
|
|
955
|
|
956 int j = 0;
|
|
957
|
|
958 for (int i = 0; i < ITERS; i++)
|
|
959 {
|
|
960 int width = 16 * (rand() % 4 + 1);
|
|
961 int stride = width + 1;
|
|
962
|
|
963 ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, width);
|
|
964 checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, width);
|
|
965
|
|
966 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)) || memcmp(psbuf2, psbuf5, BUFFSIZE))
|
|
967 return false;
|
|
968
|
|
969 reportfail();
|
|
970 j += INCR;
|
|
971 }
|
|
972
|
|
973 return true;
|
|
974 }
|
|
975
|
|
976 bool PixelHarness::check_saoCuOrgE2_t(saoCuOrgE2_t ref[2], saoCuOrgE2_t opt[2])
|
|
977 {
|
|
978 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
979 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
980
|
|
981 for (int i = 0; i < 64 * 64; i++)
|
|
982 ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
|
|
983
|
|
984 for (int id = 0; id < 2; id++)
|
|
985 {
|
|
986 int j = 0;
|
|
987 if (opt[id])
|
|
988 {
|
|
989 for (int i = 0; i < ITERS; i++)
|
|
990 {
|
|
991 int width = 16 * (1 << (id * (rand() % 2 + 1))) - (rand() % 2);
|
|
992 int stride = width + 1;
|
|
993
|
|
994 ref[width > 16](ref_dest, psbuf1 + j, psbuf2 + j, psbuf3 + j, width, stride);
|
|
995 checked(opt[width > 16], opt_dest, psbuf4 + j, psbuf2 + j, psbuf3 + j, width, stride);
|
|
996
|
|
997 if (memcmp(psbuf1 + j, psbuf4 + j, width * sizeof(int8_t)))
|
|
998 return false;
|
|
999
|
|
1000 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
1001 return false;
|
|
1002
|
|
1003 reportfail();
|
|
1004 j += INCR;
|
|
1005 }
|
|
1006 }
|
|
1007 }
|
|
1008
|
|
1009 return true;
|
|
1010 }
|
|
1011
|
|
1012 bool PixelHarness::check_saoCuOrgE3_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt)
|
|
1013 {
|
|
1014 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
1015 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
1016
|
|
1017 for (int i = 0; i < 64 * 64; i++)
|
|
1018 ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
|
|
1019
|
|
1020 int j = 0;
|
|
1021
|
|
1022 for (int i = 0; i < ITERS; i++)
|
|
1023 {
|
|
1024 int stride = 16 * (rand() % 4 + 1);
|
|
1025 int start = rand() % 2;
|
|
1026 int end = 16 - rand() % 2;
|
|
1027
|
|
1028 ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, start, end);
|
|
1029 checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, start, end);
|
|
1030
|
|
1031 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)) || memcmp(psbuf2, psbuf5, BUFFSIZE))
|
|
1032 return false;
|
|
1033
|
|
1034 reportfail();
|
|
1035 j += INCR;
|
|
1036 }
|
|
1037
|
|
1038 return true;
|
|
1039 }
|
|
1040
|
|
1041 bool PixelHarness::check_saoCuStatsBO_t(saoCuStatsBO_t ref, saoCuStatsBO_t opt)
|
|
1042 {
|
|
1043 enum { NUM_EDGETYPE = 33 }; // classIdx = 1 + (rec[x] >> 3);
|
|
1044 int32_t stats_ref[NUM_EDGETYPE];
|
|
1045 int32_t stats_vec[NUM_EDGETYPE];
|
|
1046
|
|
1047 int32_t count_ref[NUM_EDGETYPE];
|
|
1048 int32_t count_vec[NUM_EDGETYPE];
|
|
1049
|
|
1050 int j = 0;
|
|
1051 for (int i = 0; i < ITERS; i++)
|
|
1052 {
|
|
1053 // initialize input data to random, the dynamic range wrong but good to verify our asm code
|
|
1054 for (int x = 0; x < NUM_EDGETYPE; x++)
|
|
1055 {
|
|
1056 stats_ref[x] = stats_vec[x] = rand();
|
|
1057 count_ref[x] = count_vec[x] = rand();
|
|
1058 }
|
|
1059
|
|
1060 intptr_t stride = 16 * (rand() % 4 + 1);
|
|
1061 int endX = MAX_CU_SIZE - (rand() % 5);
|
|
1062 int endY = MAX_CU_SIZE - (rand() % 4) - 1;
|
|
1063
|
|
1064 ref(pbuf2 + j + 1, pbuf3 + 1, stride, endX, endY, stats_ref, count_ref);
|
|
1065 checked(opt, pbuf2 + j + 1, pbuf3 + 1, stride, endX, endY, stats_vec, count_vec);
|
|
1066
|
|
1067 if (memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref)))
|
|
1068 return false;
|
|
1069
|
|
1070 reportfail();
|
|
1071 j += INCR;
|
|
1072 }
|
|
1073
|
|
1074 return true;
|
|
1075 }
|
|
1076
|
|
1077 bool PixelHarness::check_saoCuStatsE0_t(saoCuStatsE0_t ref, saoCuStatsE0_t opt)
|
|
1078 {
|
|
1079 enum { NUM_EDGETYPE = 5 };
|
|
1080 int32_t stats_ref[NUM_EDGETYPE];
|
|
1081 int32_t stats_vec[NUM_EDGETYPE];
|
|
1082
|
|
1083 int32_t count_ref[NUM_EDGETYPE];
|
|
1084 int32_t count_vec[NUM_EDGETYPE];
|
|
1085
|
|
1086 int j = 0;
|
|
1087 for (int i = 0; i < ITERS; i++)
|
|
1088 {
|
|
1089 // initialize input data to random, the dynamic range wrong but good to verify our asm code
|
|
1090 for (int x = 0; x < NUM_EDGETYPE; x++)
|
|
1091 {
|
|
1092 stats_ref[x] = stats_vec[x] = rand();
|
|
1093 count_ref[x] = count_vec[x] = rand();
|
|
1094 }
|
|
1095
|
|
1096 intptr_t stride = 16 * (rand() % 4 + 1);
|
|
1097 int endX = MAX_CU_SIZE - (rand() % 5) - 1;
|
|
1098 int endY = MAX_CU_SIZE - (rand() % 4) - 1;
|
|
1099
|
|
1100 ref(pbuf2 + j + 1, pbuf3 + j + 1, stride, endX, endY, stats_ref, count_ref);
|
|
1101 checked(opt, pbuf2 + j + 1, pbuf3 + j + 1, stride, endX, endY, stats_vec, count_vec);
|
|
1102
|
|
1103 if (memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref)))
|
|
1104 return false;
|
|
1105
|
|
1106 reportfail();
|
|
1107 j += INCR;
|
|
1108 }
|
|
1109
|
|
1110 return true;
|
|
1111 }
|
|
1112
|
|
1113 bool PixelHarness::check_saoCuStatsE1_t(saoCuStatsE1_t ref, saoCuStatsE1_t opt)
|
|
1114 {
|
|
1115 enum { NUM_EDGETYPE = 5 };
|
|
1116 int32_t stats_ref[NUM_EDGETYPE];
|
|
1117 int32_t stats_vec[NUM_EDGETYPE];
|
|
1118
|
|
1119 int32_t count_ref[NUM_EDGETYPE];
|
|
1120 int32_t count_vec[NUM_EDGETYPE];
|
|
1121
|
|
1122 int8_t _upBuff1_ref[MAX_CU_SIZE + 2], *upBuff1_ref = _upBuff1_ref + 1;
|
|
1123 int8_t _upBuff1_vec[MAX_CU_SIZE + 2], *upBuff1_vec = _upBuff1_vec + 1;
|
|
1124
|
|
1125 int j = 0;
|
|
1126
|
|
1127 for (int i = 0; i < ITERS; i++)
|
|
1128 {
|
|
1129 // initialize input data to random, the dynamic range wrong but good to verify our asm code
|
|
1130 for (int x = 0; x < NUM_EDGETYPE; x++)
|
|
1131 {
|
|
1132 stats_ref[x] = stats_vec[x] = rand();
|
|
1133 count_ref[x] = count_vec[x] = rand();
|
|
1134 }
|
|
1135
|
|
1136 // initial sign
|
|
1137 for (int x = 0; x < MAX_CU_SIZE + 2; x++)
|
|
1138 _upBuff1_ref[x] = _upBuff1_vec[x] = (rand() % 3) - 1;
|
|
1139
|
|
1140 intptr_t stride = 16 * (rand() % 4 + 1);
|
|
1141 int endX = MAX_CU_SIZE - (rand() % 5);
|
|
1142 int endY = MAX_CU_SIZE - (rand() % 4) - 1;
|
|
1143
|
|
1144 ref(pbuf2 + 1, pbuf3 + 1, stride, upBuff1_ref, endX, endY, stats_ref, count_ref);
|
|
1145 checked(opt, pbuf2 + 1, pbuf3 + 1, stride, upBuff1_vec, endX, endY, stats_vec, count_vec);
|
|
1146
|
|
1147 if ( memcmp(_upBuff1_ref, _upBuff1_vec, sizeof(_upBuff1_ref))
|
|
1148 || memcmp(stats_ref, stats_vec, sizeof(stats_ref))
|
|
1149 || memcmp(count_ref, count_vec, sizeof(count_ref)))
|
|
1150 return false;
|
|
1151
|
|
1152 reportfail();
|
|
1153 j += INCR;
|
|
1154 }
|
|
1155
|
|
1156 return true;
|
|
1157 }
|
|
1158
|
|
1159 bool PixelHarness::check_saoCuStatsE2_t(saoCuStatsE2_t ref, saoCuStatsE2_t opt)
|
|
1160 {
|
|
1161 enum { NUM_EDGETYPE = 5 };
|
|
1162 int32_t stats_ref[NUM_EDGETYPE];
|
|
1163 int32_t stats_vec[NUM_EDGETYPE];
|
|
1164
|
|
1165 int32_t count_ref[NUM_EDGETYPE];
|
|
1166 int32_t count_vec[NUM_EDGETYPE];
|
|
1167
|
|
1168 int8_t _upBuff1_ref[MAX_CU_SIZE + 2], *upBuff1_ref = _upBuff1_ref + 1;
|
|
1169 int8_t _upBufft_ref[MAX_CU_SIZE + 2], *upBufft_ref = _upBufft_ref + 1;
|
|
1170 int8_t _upBuff1_vec[MAX_CU_SIZE + 2], *upBuff1_vec = _upBuff1_vec + 1;
|
|
1171 int8_t _upBufft_vec[MAX_CU_SIZE + 2], *upBufft_vec = _upBufft_vec + 1;
|
|
1172
|
|
1173 int j = 0;
|
|
1174
|
|
1175 // NOTE: verify more times since our asm is NOT exact match to C, the output of upBuff* will be DIFFERENT
|
|
1176 for (int i = 0; i < ITERS * 10; i++)
|
|
1177 {
|
|
1178 // initialize input data to random, the dynamic range wrong but good to verify our asm code
|
|
1179 for (int x = 0; x < NUM_EDGETYPE; x++)
|
|
1180 {
|
|
1181 stats_ref[x] = stats_vec[x] = rand();
|
|
1182 count_ref[x] = count_vec[x] = rand();
|
|
1183 }
|
|
1184
|
|
1185 // initial sign
|
|
1186 for (int x = 0; x < MAX_CU_SIZE + 2; x++)
|
|
1187 {
|
|
1188 _upBuff1_ref[x] = _upBuff1_vec[x] = (rand() % 3) - 1;
|
|
1189 _upBufft_ref[x] = _upBufft_vec[x] = (rand() % 3) - 1;
|
|
1190 }
|
|
1191
|
|
1192 intptr_t stride = 16 * (rand() % 4 + 1);
|
|
1193 int endX = MAX_CU_SIZE - (rand() % 5) - 1;
|
|
1194 int endY = MAX_CU_SIZE - (rand() % 4) - 1;
|
|
1195
|
|
1196 ref(pbuf2 + 1, pbuf3 + 1, stride, upBuff1_ref, upBufft_ref, endX, endY, stats_ref, count_ref);
|
|
1197 checked(opt, pbuf2 + 1, pbuf3 + 1, stride, upBuff1_vec, upBufft_vec, endX, endY, stats_vec, count_vec);
|
|
1198
|
|
1199 // TODO: don't check upBuff*, the latest output pixels different, and can move into stack temporary buffer in future
|
|
1200 if ( memcmp(_upBuff1_ref, _upBuff1_vec, sizeof(_upBuff1_ref))
|
|
1201 || memcmp(_upBufft_ref, _upBufft_vec, sizeof(_upBufft_ref))
|
|
1202 || memcmp(stats_ref, stats_vec, sizeof(stats_ref))
|
|
1203 || memcmp(count_ref, count_vec, sizeof(count_ref)))
|
|
1204 return false;
|
|
1205
|
|
1206 reportfail();
|
|
1207 j += INCR;
|
|
1208 }
|
|
1209
|
|
1210 return true;
|
|
1211 }
|
|
1212
|
|
1213 bool PixelHarness::check_saoCuStatsE3_t(saoCuStatsE3_t ref, saoCuStatsE3_t opt)
|
|
1214 {
|
|
1215 enum { NUM_EDGETYPE = 5 };
|
|
1216 int32_t stats_ref[NUM_EDGETYPE];
|
|
1217 int32_t stats_vec[NUM_EDGETYPE];
|
|
1218
|
|
1219 int32_t count_ref[NUM_EDGETYPE];
|
|
1220 int32_t count_vec[NUM_EDGETYPE];
|
|
1221
|
|
1222 int8_t _upBuff1_ref[MAX_CU_SIZE + 2], *upBuff1_ref = _upBuff1_ref + 1;
|
|
1223 int8_t _upBuff1_vec[MAX_CU_SIZE + 2], *upBuff1_vec = _upBuff1_vec + 1;
|
|
1224
|
|
1225 int j = 0;
|
|
1226
|
|
1227 // (const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, int32_t *count)
|
|
1228 for (int i = 0; i < ITERS; i++)
|
|
1229 {
|
|
1230 // initialize input data to random, the dynamic range wrong but good to verify our asm code
|
|
1231 for (int x = 0; x < NUM_EDGETYPE; x++)
|
|
1232 {
|
|
1233 stats_ref[x] = stats_vec[x] = rand();
|
|
1234 count_ref[x] = count_vec[x] = rand();
|
|
1235 }
|
|
1236
|
|
1237 // initial sign
|
|
1238 for (int x = 0; x < (int)sizeof(_upBuff1_ref); x++)
|
|
1239 {
|
|
1240 _upBuff1_ref[x] = _upBuff1_vec[x] = (rand() % 3) - 1;
|
|
1241 }
|
|
1242
|
|
1243 intptr_t stride = 16 * (rand() % 4 + 1);
|
|
1244 int endX = MAX_CU_SIZE - (rand() % 5) - 1;
|
|
1245 int endY = MAX_CU_SIZE - (rand() % 4) - 1;
|
|
1246
|
|
1247 ref(pbuf2, pbuf3, stride, upBuff1_ref, endX, endY, stats_ref, count_ref);
|
|
1248 checked(opt, pbuf2, pbuf3, stride, upBuff1_vec, endX, endY, stats_vec, count_vec);
|
|
1249
|
|
1250 if ( memcmp(_upBuff1_ref, _upBuff1_vec, sizeof(_upBuff1_ref))
|
|
1251 || memcmp(stats_ref, stats_vec, sizeof(stats_ref))
|
|
1252 || memcmp(count_ref, count_vec, sizeof(count_ref)))
|
|
1253 return false;
|
|
1254
|
|
1255 reportfail();
|
|
1256 j += INCR;
|
|
1257 }
|
|
1258
|
|
1259 return true;
|
|
1260 }
|
|
1261
|
|
1262 bool PixelHarness::check_saoCuOrgE3_32_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt)
|
|
1263 {
|
|
1264 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
1265 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
1266
|
|
1267 for (int i = 0; i < 64 * 64; i++)
|
|
1268 ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
|
|
1269
|
|
1270 int j = 0;
|
|
1271
|
|
1272 for (int i = 0; i < ITERS; i++)
|
|
1273 {
|
|
1274 int stride = 32 * (rand() % 2 + 1);
|
|
1275 int start = rand() % 2;
|
|
1276 int end = (32 * (rand() % 2 + 1)) - rand() % 2;
|
|
1277
|
|
1278 ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, start, end);
|
|
1279 checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, start, end);
|
|
1280
|
|
1281 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)) || memcmp(psbuf2, psbuf5, BUFFSIZE))
|
|
1282 return false;
|
|
1283
|
|
1284 reportfail();
|
|
1285 j += INCR;
|
|
1286 }
|
|
1287
|
|
1288 return true;
|
|
1289 }
|
|
1290
|
|
1291 bool PixelHarness::check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt)
|
|
1292 {
|
|
1293 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
1294 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
1295
|
|
1296 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
1297 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
1298 int width = 32 + rand() % 32;
|
|
1299 int height = 32 + rand() % 32;
|
|
1300 intptr_t srcStride = 64;
|
|
1301 intptr_t dstStride = width;
|
|
1302 int j = 0;
|
|
1303
|
|
1304 for (int i = 0; i < ITERS; i++)
|
|
1305 {
|
|
1306 int index = i % TEST_CASES;
|
|
1307 checked(opt, ushort_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)8, (uint16_t)((1 << X265_DEPTH) - 1));
|
|
1308 ref(ushort_test_buff[index] + j, srcStride, ref_dest, dstStride, width, height, (int)8, (uint16_t)((1 << X265_DEPTH) - 1));
|
|
1309
|
|
1310 if (memcmp(ref_dest, opt_dest, width * height * sizeof(pixel)))
|
|
1311 return false;
|
|
1312
|
|
1313 reportfail();
|
|
1314 j += INCR;
|
|
1315 }
|
|
1316
|
|
1317 return true;
|
|
1318 }
|
|
1319
|
|
1320 bool PixelHarness::check_planecopy_cp(planecopy_cp_t ref, planecopy_cp_t opt)
|
|
1321 {
|
|
1322 ALIGN_VAR_16(pixel, ref_dest[64 * 64 * 2]);
|
|
1323 ALIGN_VAR_16(pixel, opt_dest[64 * 64 * 2]);
|
|
1324
|
|
1325 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
1326 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
1327
|
|
1328 int width = 16 + rand() % 48;
|
|
1329 int height = 16 + rand() % 48;
|
|
1330 intptr_t srcStride = 64;
|
|
1331 intptr_t dstStride = width;
|
|
1332 int j = 0;
|
|
1333
|
|
1334 for (int i = 0; i < ITERS; i++)
|
|
1335 {
|
|
1336 int index = i % TEST_CASES;
|
|
1337 checked(opt, uchar_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)2);
|
|
1338 ref(uchar_test_buff[index] + j, srcStride, ref_dest, dstStride, width, height, (int)2);
|
|
1339
|
|
1340 if (memcmp(ref_dest, opt_dest, sizeof(ref_dest)))
|
|
1341 return false;
|
|
1342
|
|
1343 reportfail();
|
|
1344 j += INCR;
|
|
1345 }
|
|
1346
|
|
1347 return true;
|
|
1348 }
|
|
1349
|
|
1350 bool PixelHarness::check_cutree_propagate_cost(cutree_propagate_cost ref, cutree_propagate_cost opt)
|
|
1351 {
|
|
1352 ALIGN_VAR_16(int, ref_dest[64 * 64]);
|
|
1353 ALIGN_VAR_16(int, opt_dest[64 * 64]);
|
|
1354
|
|
1355 memset(ref_dest, 0xCD, sizeof(ref_dest));
|
|
1356 memset(opt_dest, 0xCD, sizeof(opt_dest));
|
|
1357
|
|
1358 double fps = 1.0;
|
|
1359 int width = 16 + rand() % 64;
|
|
1360 int j = 0;
|
|
1361
|
|
1362 for (int i = 0; i < ITERS; i++)
|
|
1363 {
|
|
1364 int index = i % TEST_CASES;
|
|
1365 checked(opt, opt_dest, ushort_test_buff[index] + j, int_test_buff[index] + j, ushort_test_buff[index] + j, int_test_buff[index] + j, &fps, width);
|
|
1366 ref(ref_dest, ushort_test_buff[index] + j, int_test_buff[index] + j, ushort_test_buff[index] + j, int_test_buff[index] + j, &fps, width);
|
|
1367
|
|
1368 if (memcmp(ref_dest, opt_dest, width * sizeof(pixel)))
|
|
1369 return false;
|
|
1370
|
|
1371 reportfail();
|
|
1372 j += INCR;
|
|
1373 }
|
|
1374
|
|
1375 return true;
|
|
1376 }
|
|
1377
|
|
1378 bool PixelHarness::check_psyCost_pp(pixelcmp_t ref, pixelcmp_t opt)
|
|
1379 {
|
|
1380 int j = 0, index1, index2, optres, refres;
|
|
1381 intptr_t stride = STRIDE;
|
|
1382
|
|
1383 for (int i = 0; i < ITERS; i++)
|
|
1384 {
|
|
1385 index1 = rand() % TEST_CASES;
|
|
1386 index2 = rand() % TEST_CASES;
|
|
1387 optres = (int)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
|
|
1388 refres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
|
|
1389
|
|
1390 if (optres != refres)
|
|
1391 return false;
|
|
1392
|
|
1393 reportfail();
|
|
1394 j += INCR;
|
|
1395 }
|
|
1396
|
|
1397 return true;
|
|
1398 }
|
|
1399
|
|
1400 bool PixelHarness::check_psyCost_ss(pixelcmp_ss_t ref, pixelcmp_ss_t opt)
|
|
1401 {
|
|
1402 int j = 0, index1, index2, optres, refres;
|
|
1403 intptr_t stride = STRIDE;
|
|
1404
|
|
1405 for (int i = 0; i < ITERS; i++)
|
|
1406 {
|
|
1407 index1 = rand() % TEST_CASES;
|
|
1408 index2 = rand() % TEST_CASES;
|
|
1409 optres = (int)checked(opt, short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
|
|
1410 refres = ref(short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
|
|
1411
|
|
1412 if (optres != refres)
|
|
1413 return false;
|
|
1414
|
|
1415 reportfail();
|
|
1416 j += INCR;
|
|
1417 }
|
|
1418
|
|
1419 return true;
|
|
1420 }
|
|
1421
|
|
1422 bool PixelHarness::check_saoCuOrgB0_t(saoCuOrgB0_t ref, saoCuOrgB0_t opt)
|
|
1423 {
|
|
1424 ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
|
|
1425 ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
|
|
1426
|
|
1427 for (int i = 0; i < 64 * 64; i++)
|
|
1428 ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX);
|
|
1429
|
|
1430 int j = 0;
|
|
1431
|
|
1432 for (int i = 0; i < ITERS; i++)
|
|
1433 {
|
|
1434 int width = 16 * (rand() % 4 + 1);
|
|
1435 int height = rand() % 63 + 2;
|
|
1436 int stride = width;
|
|
1437
|
|
1438 ref(ref_dest, psbuf1 + j, width, height, stride);
|
|
1439 checked(opt, opt_dest, psbuf1 + j, width, height, stride);
|
|
1440
|
|
1441 if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
|
|
1442 return false;
|
|
1443
|
|
1444 reportfail();
|
|
1445 j += INCR;
|
|
1446 }
|
|
1447
|
|
1448 return true;
|
|
1449 }
|
|
1450
|
|
1451 bool PixelHarness::check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt)
|
|
1452 {
|
|
1453 ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]);
|
|
1454 uint8_t ref_coeffNum[MLS_GRP_NUM], opt_coeffNum[MLS_GRP_NUM]; // value range[0, 16]
|
|
1455 uint16_t ref_coeffSign[MLS_GRP_NUM], opt_coeffSign[MLS_GRP_NUM]; // bit mask map for non-zero coeff sign
|
|
1456 uint16_t ref_coeffFlag[MLS_GRP_NUM], opt_coeffFlag[MLS_GRP_NUM]; // bit mask map for non-zero coeff
|
|
1457
|
|
1458 int totalCoeffs = 0;
|
|
1459 for (int i = 0; i < 32 * 32; i++)
|
|
1460 {
|
|
1461 ref_src[i] = rand() & SHORT_MAX;
|
|
1462
|
|
1463 // more zero coeff
|
|
1464 if (ref_src[i] < SHORT_MAX * 2 / 3)
|
|
1465 ref_src[i] = 0;
|
|
1466
|
|
1467 // more negtive
|
|
1468 if ((rand() % 10) < 8)
|
|
1469 ref_src[i] *= -1;
|
|
1470 totalCoeffs += (ref_src[i] != 0);
|
|
1471 }
|
|
1472
|
|
1473 // extra test area all of 0x1234
|
|
1474 for (int i = 0; i < ITERS * 2; i++)
|
|
1475 {
|
|
1476 ref_src[32 * 32 + i] = 0x1234;
|
|
1477 }
|
|
1478
|
|
1479
|
|
1480 memset(ref_coeffNum, 0xCD, sizeof(ref_coeffNum));
|
|
1481 memset(ref_coeffSign, 0xCD, sizeof(ref_coeffSign));
|
|
1482 memset(ref_coeffFlag, 0xCD, sizeof(ref_coeffFlag));
|
|
1483
|
|
1484 memset(opt_coeffNum, 0xCD, sizeof(opt_coeffNum));
|
|
1485 memset(opt_coeffSign, 0xCD, sizeof(opt_coeffSign));
|
|
1486 memset(opt_coeffFlag, 0xCD, sizeof(opt_coeffFlag));
|
|
1487
|
|
1488 for (int i = 0; i < ITERS; i++)
|
|
1489 {
|
|
1490 int rand_scan_type = rand() % NUM_SCAN_TYPE;
|
|
1491 int rand_scan_size = rand() % NUM_SCAN_SIZE;
|
|
1492 int rand_numCoeff = 0;
|
|
1493
|
|
1494 for (int j = 0; j < 1 << (2 * (rand_scan_size + 2)); j++)
|
|
1495 rand_numCoeff += (ref_src[i + j] != 0);
|
|
1496
|
|
1497 // at least one coeff in transform block
|
|
1498 if (rand_numCoeff == 0)
|
|
1499 {
|
|
1500 ref_src[i + (1 << (2 * (rand_scan_size + 2))) - 1] = -1;
|
|
1501 rand_numCoeff = 1;
|
|
1502 }
|
|
1503
|
|
1504 const int trSize = (1 << (rand_scan_size + 2));
|
|
1505 const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size];
|
|
1506 const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <= (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG];
|
|
1507
|
|
1508 int ref_scanPos = ref(scanTbl, ref_src + i, ref_coeffSign, ref_coeffFlag, ref_coeffNum, rand_numCoeff, scanTblCG4x4, trSize);
|
|
1509 int opt_scanPos = (int)checked(opt, scanTbl, ref_src + i, opt_coeffSign, opt_coeffFlag, opt_coeffNum, rand_numCoeff, scanTblCG4x4, trSize);
|
|
1510
|
|
1511 if (ref_scanPos != opt_scanPos)
|
|
1512 return false;
|
|
1513
|
|
1514 for (int j = 0; rand_numCoeff; j++)
|
|
1515 {
|
|
1516 if (ref_coeffSign[j] != opt_coeffSign[j])
|
|
1517 return false;
|
|
1518
|
|
1519 if (ref_coeffFlag[j] != opt_coeffFlag[j])
|
|
1520 return false;
|
|
1521
|
|
1522 if (ref_coeffNum[j] != opt_coeffNum[j])
|
|
1523 return false;
|
|
1524
|
|
1525 rand_numCoeff -= ref_coeffNum[j];
|
|
1526 }
|
|
1527
|
|
1528 if (rand_numCoeff != 0)
|
|
1529 return false;
|
|
1530
|
|
1531 reportfail();
|
|
1532 }
|
|
1533
|
|
1534 return true;
|
|
1535 }
|
|
1536
|
|
1537 bool PixelHarness::check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt)
|
|
1538 {
|
|
1539 ALIGN_VAR_16(coeff_t, ref_src[4 * 32 + ITERS * 2]);
|
|
1540 memset(ref_src, 0, sizeof(ref_src));
|
|
1541
|
|
1542 // minus ITERS for keep probability to generate all zeros block
|
|
1543 for (int i = 0; i < 4 * 32 - ITERS; i++)
|
|
1544 {
|
|
1545 ref_src[i] = rand() & SHORT_MAX;
|
|
1546 }
|
|
1547
|
|
1548 // extra test area all of Zeros
|
|
1549
|
|
1550 for (int i = 0; i < ITERS; i++)
|
|
1551 {
|
|
1552 int rand_scan_type = rand() % NUM_SCAN_TYPE;
|
|
1553 int rand_scan_size = (rand() % NUM_SCAN_SIZE) + 2;
|
|
1554 const int trSize = (1 << rand_scan_size);
|
|
1555 coeff_t *rand_src = ref_src + i;
|
|
1556
|
|
1557 const uint16_t* const scanTbl = g_scan4x4[rand_scan_type];
|
|
1558
|
|
1559 int j;
|
|
1560 for (j = 0; j < SCAN_SET_SIZE; j++)
|
|
1561 {
|
|
1562 const uint32_t idxY = j / MLS_CG_SIZE;
|
|
1563 const uint32_t idxX = j % MLS_CG_SIZE;
|
|
1564 if (rand_src[idxY * trSize + idxX]) break;
|
|
1565 }
|
|
1566
|
|
1567 uint32_t ref_scanPos = ref(rand_src, trSize, scanTbl);
|
|
1568 uint32_t opt_scanPos = (int)checked(opt, rand_src, trSize, scanTbl);
|
|
1569
|
|
1570 // specially case: all coeff group are zero
|
|
1571 if (j >= SCAN_SET_SIZE)
|
|
1572 {
|
|
1573 // all zero block the high 16-bits undefined
|
|
1574 if ((uint16_t)ref_scanPos != (uint16_t)opt_scanPos)
|
|
1575 return false;
|
|
1576 }
|
|
1577 else if (ref_scanPos != opt_scanPos)
|
|
1578 return false;
|
|
1579
|
|
1580 reportfail();
|
|
1581 }
|
|
1582
|
|
1583 return true;
|
|
1584 }
|
|
1585
|
|
1586 bool PixelHarness::check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt)
|
|
1587 {
|
|
1588 ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 3]);
|
|
1589 ALIGN_VAR_32(uint16_t, ref_absCoeff[1 << MLS_CG_SIZE]);
|
|
1590 ALIGN_VAR_32(uint16_t, opt_absCoeff[1 << MLS_CG_SIZE]);
|
|
1591
|
|
1592 memset(ref_absCoeff, 0xCD, sizeof(ref_absCoeff));
|
|
1593 memset(opt_absCoeff, 0xCD, sizeof(opt_absCoeff));
|
|
1594
|
|
1595 int totalCoeffs = 0;
|
|
1596 for (int i = 0; i < 32 * 32; i++)
|
|
1597 {
|
|
1598 ref_src[i] = rand() & SHORT_MAX;
|
|
1599
|
|
1600 // more zero coeff
|
|
1601 if (ref_src[i] < SHORT_MAX * 2 / 3)
|
|
1602 ref_src[i] = 0;
|
|
1603
|
|
1604 // more negtive
|
|
1605 if ((rand() % 10) < 8)
|
|
1606 ref_src[i] *= -1;
|
|
1607 totalCoeffs += (ref_src[i] != 0);
|
|
1608 }
|
|
1609
|
|
1610 // extra test area all of 0x1234
|
|
1611 for (int i = 0; i < ITERS * 3; i++)
|
|
1612 {
|
|
1613 ref_src[32 * 32 + i] = 0x1234;
|
|
1614 }
|
|
1615
|
|
1616 // generate CABAC context table
|
|
1617 uint8_t m_contextState_ref[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
|
|
1618 uint8_t m_contextState_opt[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
|
|
1619 for (int k = 0; k < (OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA); k++)
|
|
1620 {
|
|
1621 m_contextState_ref[k] = (rand() % (125 - 2)) + 2;
|
|
1622 m_contextState_opt[k] = m_contextState_ref[k];
|
|
1623 }
|
|
1624 uint8_t *const ref_baseCtx = m_contextState_ref;
|
|
1625 uint8_t *const opt_baseCtx = m_contextState_opt;
|
|
1626
|
|
1627 for (int i = 0; i < ITERS * 2; i++)
|
|
1628 {
|
|
1629 int rand_scan_type = rand() % NUM_SCAN_TYPE;
|
|
1630 int rand_scanPosSigOff = rand() % 16; //rand_scanPosSigOff range is [1,15]
|
|
1631 int rand_patternSigCtx = rand() % 4; //range [0,3]
|
|
1632 int rand_scan_size = rand() % NUM_SCAN_SIZE;
|
|
1633 int offset; // the value have a exact range, details in CoeffNxN()
|
|
1634 if (rand_scan_size == 2)
|
|
1635 offset = 0;
|
|
1636 else if (rand_scan_size == 3)
|
|
1637 offset = 9;
|
|
1638 else
|
|
1639 offset = 12;
|
|
1640
|
|
1641 const int trSize = (1 << (rand_scan_size + 2));
|
|
1642 ALIGN_VAR_32(static const uint8_t, table_cnt[5][SCAN_SET_SIZE]) =
|
|
1643 {
|
|
1644 // patternSigCtx = 0
|
|
1645 {
|
|
1646 2, 1, 1, 0,
|
|
1647 1, 1, 0, 0,
|
|
1648 1, 0, 0, 0,
|
|
1649 0, 0, 0, 0,
|
|
1650 },
|
|
1651 // patternSigCtx = 1
|
|
1652 {
|
|
1653 2, 2, 2, 2,
|
|
1654 1, 1, 1, 1,
|
|
1655 0, 0, 0, 0,
|
|
1656 0, 0, 0, 0,
|
|
1657 },
|
|
1658 // patternSigCtx = 2
|
|
1659 {
|
|
1660 2, 1, 0, 0,
|
|
1661 2, 1, 0, 0,
|
|
1662 2, 1, 0, 0,
|
|
1663 2, 1, 0, 0,
|
|
1664 },
|
|
1665 // patternSigCtx = 3
|
|
1666 {
|
|
1667 2, 2, 2, 2,
|
|
1668 2, 2, 2, 2,
|
|
1669 2, 2, 2, 2,
|
|
1670 2, 2, 2, 2,
|
|
1671 },
|
|
1672 // 4x4
|
|
1673 {
|
|
1674 0, 1, 4, 5,
|
|
1675 2, 3, 4, 5,
|
|
1676 6, 6, 8, 8,
|
|
1677 7, 7, 8, 8
|
|
1678 }
|
|
1679 };
|
|
1680 const uint8_t *rand_tabSigCtx = table_cnt[(rand_scan_size == 2) ? 4 : (uint32_t)rand_patternSigCtx];
|
|
1681 const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size];
|
|
1682 const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <= (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG];
|
|
1683
|
|
1684 int rand_scanPosCG = rand() % (trSize * trSize / MLS_CG_BLK_SIZE);
|
|
1685 int subPosBase = rand_scanPosCG * MLS_CG_BLK_SIZE;
|
|
1686 int rand_numCoeff = 0;
|
|
1687 uint32_t scanFlagMask = 0;
|
|
1688 const int numNonZero = (rand_scanPosSigOff < (MLS_CG_BLK_SIZE - 1)) ? 1 : 0;
|
|
1689
|
|
1690 for(int k = 0; k <= rand_scanPosSigOff; k++)
|
|
1691 {
|
|
1692 uint32_t pos = scanTbl[subPosBase + k];
|
|
1693 coeff_t tmp_coeff = ref_src[i + pos];
|
|
1694 if (tmp_coeff != 0)
|
|
1695 {
|
|
1696 rand_numCoeff++;
|
|
1697 }
|
|
1698 scanFlagMask = scanFlagMask * 2 + (tmp_coeff != 0);
|
|
1699 }
|
|
1700
|
|
1701 // can't process all zeros block
|
|
1702 if (rand_numCoeff == 0)
|
|
1703 continue;
|
|
1704
|
|
1705 const uint32_t blkPosBase = scanTbl[subPosBase];
|
|
1706 uint32_t ref_sum = ref(scanTblCG4x4, &ref_src[blkPosBase + i], trSize, ref_absCoeff + numNonZero, rand_tabSigCtx, scanFlagMask, (uint8_t*)ref_baseCtx, offset, rand_scanPosSigOff, subPosBase);
|
|
1707 uint32_t opt_sum = (uint32_t)checked(opt, scanTblCG4x4, &ref_src[blkPosBase + i], trSize, opt_absCoeff + numNonZero, rand_tabSigCtx, scanFlagMask, (uint8_t*)opt_baseCtx, offset, rand_scanPosSigOff, subPosBase);
|
|
1708
|
|
1709 if (ref_sum != opt_sum)
|
|
1710 return false;
|
|
1711 if (memcmp(ref_baseCtx, opt_baseCtx, sizeof(m_contextState_ref)))
|
|
1712 return false;
|
|
1713
|
|
1714 // NOTE: just first rand_numCoeff valid, but I check full buffer for confirm no overwrite bug
|
|
1715 if (memcmp(ref_absCoeff, opt_absCoeff, sizeof(ref_absCoeff)))
|
|
1716 return false;
|
|
1717
|
|
1718 reportfail();
|
|
1719 }
|
|
1720 return true;
|
|
1721 }
|
|
1722
|
|
1723 bool PixelHarness::check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt)
|
|
1724 {
|
|
1725 ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + ITERS]);
|
|
1726
|
|
1727 for (int i = 0; i < (1 << MLS_CG_SIZE) + ITERS; i++)
|
|
1728 {
|
|
1729 absCoeff[i] = rand() & SHORT_MAX;
|
|
1730 // more coeff with value one
|
|
1731 if (absCoeff[i] < SHORT_MAX * 2 / 3)
|
|
1732 absCoeff[i] = 1;
|
|
1733 }
|
|
1734 for (int i = 0; i < ITERS; i++)
|
|
1735 {
|
|
1736 uint32_t firstC2Idx = 0;
|
|
1737 int k = 0;
|
|
1738 int numNonZero = rand() % 17; //can be random, range[1, 16]
|
|
1739 for (k = 0; k < C1FLAG_NUMBER; k++)
|
|
1740 {
|
|
1741 if (absCoeff[i + k] >= 2)
|
|
1742 {
|
|
1743 break;
|
|
1744 }
|
|
1745 }
|
|
1746 firstC2Idx = k; // it is index of exact first coeff that value more than 2
|
|
1747 int ref_sum = ref(absCoeff + i, numNonZero, firstC2Idx);
|
|
1748 int opt_sum = (int)checked(opt, absCoeff + i, numNonZero, firstC2Idx);
|
|
1749 if (ref_sum != opt_sum)
|
|
1750 return false;
|
|
1751 }
|
|
1752 return true;
|
|
1753 }
|
|
1754
|
|
1755 bool PixelHarness::check_costC1C2Flag(costC1C2Flag_t ref, costC1C2Flag_t opt)
|
|
1756 {
|
|
1757 ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE)]);
|
|
1758
|
|
1759 // generate CABAC context table
|
|
1760 uint8_t ref_baseCtx[8];
|
|
1761 uint8_t opt_baseCtx[8];
|
|
1762 for (int k = 0; k < 8; k++)
|
|
1763 {
|
|
1764 ref_baseCtx[k] =
|
|
1765 opt_baseCtx[k] = (rand() % (125 - 2)) + 2;
|
|
1766 }
|
|
1767
|
|
1768 for (int i = 0; i < ITERS; i++)
|
|
1769 {
|
|
1770 int rand_offset = rand() % 4;
|
|
1771 int numNonZero = 0;
|
|
1772
|
|
1773 // generate test data, all are Absolute value and Aligned
|
|
1774 for (int k = 0; k < C1FLAG_NUMBER; k++)
|
|
1775 {
|
|
1776 int value = rand() & SHORT_MAX;
|
|
1777 // more coeff with value [0,2]
|
|
1778 if (value < SHORT_MAX * 1 / 3)
|
|
1779 value = 0;
|
|
1780 else if (value < SHORT_MAX * 2 / 3)
|
|
1781 value = 1;
|
|
1782 else if (value < SHORT_MAX * 3 / 4)
|
|
1783 value = 2;
|
|
1784
|
|
1785 if (value)
|
|
1786 {
|
|
1787 absCoeff[numNonZero] = (uint16_t)value;
|
|
1788 numNonZero++;
|
|
1789 }
|
|
1790 }
|
|
1791
|
|
1792 int ref_sum = ref(absCoeff, (intptr_t)numNonZero, ref_baseCtx, (intptr_t)rand_offset);
|
|
1793 int opt_sum = (int)checked(opt, absCoeff, (intptr_t)numNonZero, opt_baseCtx, (intptr_t)rand_offset);
|
|
1794 if (ref_sum != opt_sum)
|
|
1795 {
|
|
1796 ref_sum = ref(absCoeff, (intptr_t)numNonZero, ref_baseCtx, (intptr_t)rand_offset);
|
|
1797 opt_sum = opt(absCoeff, (intptr_t)numNonZero, opt_baseCtx, (intptr_t)rand_offset);
|
|
1798 return false;
|
|
1799 }
|
|
1800 }
|
|
1801 return true;
|
|
1802 }
|
|
1803
|
|
1804 bool PixelHarness::check_planeClipAndMax(planeClipAndMax_t ref, planeClipAndMax_t opt)
|
|
1805 {
|
|
1806 for (int i = 0; i < ITERS; i++)
|
|
1807 {
|
|
1808 intptr_t rand_stride = rand() % STRIDE;
|
|
1809 int rand_width = (rand() % (STRIDE * 2)) + 1;
|
|
1810 const int rand_height = (rand() % MAX_HEIGHT) + 1;
|
|
1811 const pixel rand_min = rand() % 32;
|
|
1812 const pixel rand_max = PIXEL_MAX - (rand() % 32);
|
|
1813 uint64_t ref_sum, opt_sum;
|
|
1814
|
|
1815 // video width must be more than or equal to 32
|
|
1816 if (rand_width < 32)
|
|
1817 rand_width = 32;
|
|
1818
|
|
1819 // stride must be more than or equal to width
|
|
1820 if (rand_stride < rand_width)
|
|
1821 rand_stride = rand_width;
|
|
1822
|
|
1823 pixel ref_max = ref(pbuf1, rand_stride, rand_width, rand_height, &ref_sum, rand_min, rand_max);
|
|
1824 pixel opt_max = (pixel)checked(opt, pbuf1, rand_stride, rand_width, rand_height, &opt_sum, rand_min, rand_max);
|
|
1825
|
|
1826 if (ref_max != opt_max)
|
|
1827 return false;
|
|
1828 }
|
|
1829 return true;
|
|
1830 }
|
|
1831
|
|
1832 bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
|
|
1833 {
|
|
1834 if (opt.pu[part].satd)
|
|
1835 {
|
|
1836 if (!check_pixelcmp(ref.pu[part].satd, opt.pu[part].satd))
|
|
1837 {
|
|
1838 printf("satd[%s]: failed!\n", lumaPartStr[part]);
|
|
1839 return false;
|
|
1840 }
|
|
1841 }
|
|
1842
|
|
1843 if (opt.pu[part].sad)
|
|
1844 {
|
|
1845 if (!check_pixelcmp(ref.pu[part].sad, opt.pu[part].sad))
|
|
1846 {
|
|
1847 printf("sad[%s]: failed!\n", lumaPartStr[part]);
|
|
1848 return false;
|
|
1849 }
|
|
1850 }
|
|
1851
|
|
1852 if (opt.pu[part].sad_x3)
|
|
1853 {
|
|
1854 if (!check_pixelcmp_x3(ref.pu[part].sad_x3, opt.pu[part].sad_x3))
|
|
1855 {
|
|
1856 printf("sad_x3[%s]: failed!\n", lumaPartStr[part]);
|
|
1857 return false;
|
|
1858 }
|
|
1859 }
|
|
1860
|
|
1861 if (opt.pu[part].sad_x4)
|
|
1862 {
|
|
1863 if (!check_pixelcmp_x4(ref.pu[part].sad_x4, opt.pu[part].sad_x4))
|
|
1864 {
|
|
1865 printf("sad_x4[%s]: failed!\n", lumaPartStr[part]);
|
|
1866 return false;
|
|
1867 }
|
|
1868 }
|
|
1869
|
|
1870 if (opt.pu[part].pixelavg_pp)
|
|
1871 {
|
|
1872 if (!check_pixelavg_pp(ref.pu[part].pixelavg_pp, opt.pu[part].pixelavg_pp))
|
|
1873 {
|
|
1874 printf("pixelavg_pp[%s]: failed!\n", lumaPartStr[part]);
|
|
1875 return false;
|
|
1876 }
|
|
1877 }
|
|
1878
|
|
1879 if (opt.pu[part].copy_pp)
|
|
1880 {
|
|
1881 if (!check_copy_pp(ref.pu[part].copy_pp, opt.pu[part].copy_pp))
|
|
1882 {
|
|
1883 printf("copy_pp[%s] failed\n", lumaPartStr[part]);
|
|
1884 return false;
|
|
1885 }
|
|
1886 }
|
|
1887
|
|
1888 if (opt.pu[part].addAvg)
|
|
1889 {
|
|
1890 if (!check_addAvg(ref.pu[part].addAvg, opt.pu[part].addAvg))
|
|
1891 {
|
|
1892 printf("addAvg[%s] failed\n", lumaPartStr[part]);
|
|
1893 return false;
|
|
1894 }
|
|
1895 }
|
|
1896
|
|
1897 if (part < NUM_CU_SIZES)
|
|
1898 {
|
|
1899 if (opt.cu[part].sse_pp)
|
|
1900 {
|
|
1901 if (!check_pixel_sse(ref.cu[part].sse_pp, opt.cu[part].sse_pp))
|
|
1902 {
|
|
1903 printf("sse_pp[%s]: failed!\n", lumaPartStr[part]);
|
|
1904 return false;
|
|
1905 }
|
|
1906 }
|
|
1907
|
|
1908 if (opt.cu[part].sse_ss)
|
|
1909 {
|
|
1910 if (!check_pixel_sse_ss(ref.cu[part].sse_ss, opt.cu[part].sse_ss))
|
|
1911 {
|
|
1912 printf("sse_ss[%s]: failed!\n", lumaPartStr[part]);
|
|
1913 return false;
|
|
1914 }
|
|
1915 }
|
|
1916
|
|
1917 if (opt.cu[part].sub_ps)
|
|
1918 {
|
|
1919 if (!check_pixel_sub_ps(ref.cu[part].sub_ps, opt.cu[part].sub_ps))
|
|
1920 {
|
|
1921 printf("sub_ps[%s] failed\n", lumaPartStr[part]);
|
|
1922 return false;
|
|
1923 }
|
|
1924 }
|
|
1925
|
|
1926 if (opt.cu[part].add_ps)
|
|
1927 {
|
|
1928 if (!check_pixel_add_ps(ref.cu[part].add_ps, opt.cu[part].add_ps))
|
|
1929 {
|
|
1930 printf("add_ps[%s] failed\n", lumaPartStr[part]);
|
|
1931 return false;
|
|
1932 }
|
|
1933 }
|
|
1934
|
|
1935 if (opt.cu[part].copy_ss)
|
|
1936 {
|
|
1937 if (!check_copy_ss(ref.cu[part].copy_ss, opt.cu[part].copy_ss))
|
|
1938 {
|
|
1939 printf("copy_ss[%s] failed\n", lumaPartStr[part]);
|
|
1940 return false;
|
|
1941 }
|
|
1942 }
|
|
1943
|
|
1944 if (opt.cu[part].copy_sp)
|
|
1945 {
|
|
1946 if (!check_copy_sp(ref.cu[part].copy_sp, opt.cu[part].copy_sp))
|
|
1947 {
|
|
1948 printf("copy_sp[%s] failed\n", lumaPartStr[part]);
|
|
1949 return false;
|
|
1950 }
|
|
1951 }
|
|
1952
|
|
1953 if (opt.cu[part].copy_ps)
|
|
1954 {
|
|
1955 if (!check_copy_ps(ref.cu[part].copy_ps, opt.cu[part].copy_ps))
|
|
1956 {
|
|
1957 printf("copy_ps[%s] failed\n", lumaPartStr[part]);
|
|
1958 return false;
|
|
1959 }
|
|
1960 }
|
|
1961 }
|
|
1962
|
|
1963 for (int i = 0; i < X265_CSP_COUNT; i++)
|
|
1964 {
|
|
1965 if (opt.chroma[i].pu[part].copy_pp)
|
|
1966 {
|
|
1967 if (!check_copy_pp(ref.chroma[i].pu[part].copy_pp, opt.chroma[i].pu[part].copy_pp))
|
|
1968 {
|
|
1969 printf("chroma_copy_pp[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
1970 return false;
|
|
1971 }
|
|
1972 }
|
|
1973 if (opt.chroma[i].pu[part].addAvg)
|
|
1974 {
|
|
1975 if (!check_addAvg(ref.chroma[i].pu[part].addAvg, opt.chroma[i].pu[part].addAvg))
|
|
1976 {
|
|
1977 printf("chroma_addAvg[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
1978 return false;
|
|
1979 }
|
|
1980 }
|
|
1981 if (opt.chroma[i].pu[part].satd)
|
|
1982 {
|
|
1983 if (!check_pixelcmp(ref.chroma[i].pu[part].satd, opt.chroma[i].pu[part].satd))
|
|
1984 {
|
|
1985 printf("chroma_satd[%s][%s] failed!\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
1986 return false;
|
|
1987 }
|
|
1988 }
|
|
1989 if (part < NUM_CU_SIZES)
|
|
1990 {
|
|
1991 if (opt.chroma[i].cu[part].sse_pp)
|
|
1992 {
|
|
1993 if (!check_pixel_sse(ref.chroma[i].cu[part].sse_pp, opt.chroma[i].cu[part].sse_pp))
|
|
1994 {
|
|
1995 printf("chroma_sse_pp[%s][%s]: failed!\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
1996 return false;
|
|
1997 }
|
|
1998 }
|
|
1999 if (opt.chroma[i].cu[part].sub_ps)
|
|
2000 {
|
|
2001 if (!check_pixel_sub_ps(ref.chroma[i].cu[part].sub_ps, opt.chroma[i].cu[part].sub_ps))
|
|
2002 {
|
|
2003 printf("chroma_sub_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2004 return false;
|
|
2005 }
|
|
2006 }
|
|
2007 if (opt.chroma[i].cu[part].add_ps)
|
|
2008 {
|
|
2009 if (!check_pixel_add_ps(ref.chroma[i].cu[part].add_ps, opt.chroma[i].cu[part].add_ps))
|
|
2010 {
|
|
2011 printf("chroma_add_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2012 return false;
|
|
2013 }
|
|
2014 }
|
|
2015 if (opt.chroma[i].cu[part].copy_sp)
|
|
2016 {
|
|
2017 if (!check_copy_sp(ref.chroma[i].cu[part].copy_sp, opt.chroma[i].cu[part].copy_sp))
|
|
2018 {
|
|
2019 printf("chroma_copy_sp[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2020 return false;
|
|
2021 }
|
|
2022 }
|
|
2023 if (opt.chroma[i].cu[part].copy_ps)
|
|
2024 {
|
|
2025 if (!check_copy_ps(ref.chroma[i].cu[part].copy_ps, opt.chroma[i].cu[part].copy_ps))
|
|
2026 {
|
|
2027 printf("chroma_copy_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2028 return false;
|
|
2029 }
|
|
2030 }
|
|
2031 if (opt.chroma[i].cu[part].copy_ss)
|
|
2032 {
|
|
2033 if (!check_copy_ss(ref.chroma[i].cu[part].copy_ss, opt.chroma[i].cu[part].copy_ss))
|
|
2034 {
|
|
2035 printf("chroma_copy_ss[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2036 return false;
|
|
2037 }
|
|
2038 }
|
|
2039 if (opt.chroma[i].cu[part].sa8d)
|
|
2040 {
|
|
2041 if (!check_pixelcmp(ref.chroma[i].cu[part].sa8d, opt.chroma[i].cu[part].sa8d))
|
|
2042 {
|
|
2043 printf("chroma_sa8d[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2044 return false;
|
|
2045 }
|
|
2046 }
|
|
2047 }
|
|
2048 }
|
|
2049
|
|
2050 return true;
|
|
2051 }
|
|
2052
|
|
2053 bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
|
|
2054 {
|
|
2055 for (int size = 4; size <= 64; size *= 2)
|
|
2056 {
|
|
2057 int part = partitionFromSizes(size, size); // 2Nx2N
|
|
2058 if (!testPU(part, ref, opt)) return false;
|
|
2059
|
|
2060 if (size > 4)
|
|
2061 {
|
|
2062 part = partitionFromSizes(size, size >> 1); // 2NxN
|
|
2063 if (!testPU(part, ref, opt)) return false;
|
|
2064 part = partitionFromSizes(size >> 1, size); // Nx2N
|
|
2065 if (!testPU(part, ref, opt)) return false;
|
|
2066 }
|
|
2067 if (size > 8)
|
|
2068 {
|
|
2069 // 4 AMP modes
|
|
2070 part = partitionFromSizes(size, size >> 2);
|
|
2071 if (!testPU(part, ref, opt)) return false;
|
|
2072 part = partitionFromSizes(size, 3 * (size >> 2));
|
|
2073 if (!testPU(part, ref, opt)) return false;
|
|
2074
|
|
2075 part = partitionFromSizes(size >> 2, size);
|
|
2076 if (!testPU(part, ref, opt)) return false;
|
|
2077 part = partitionFromSizes(3 * (size >> 2), size);
|
|
2078 if (!testPU(part, ref, opt)) return false;
|
|
2079 }
|
|
2080 }
|
|
2081
|
|
2082 for (int i = 0; i < NUM_CU_SIZES; i++)
|
|
2083 {
|
|
2084 if (opt.cu[i].sa8d)
|
|
2085 {
|
|
2086 if (!check_pixelcmp(ref.cu[i].sa8d, opt.cu[i].sa8d))
|
|
2087 {
|
|
2088 printf("sa8d[%dx%d]: failed!\n", 4 << i, 4 << i);
|
|
2089 return false;
|
|
2090 }
|
|
2091 }
|
|
2092
|
|
2093 if (opt.cu[i].blockfill_s)
|
|
2094 {
|
|
2095 if (!check_blockfill_s(ref.cu[i].blockfill_s, opt.cu[i].blockfill_s))
|
|
2096 {
|
|
2097 printf("blockfill_s[%dx%d]: failed!\n", 4 << i, 4 << i);
|
|
2098 return false;
|
|
2099 }
|
|
2100 }
|
|
2101
|
|
2102 if (opt.cu[i].var)
|
|
2103 {
|
|
2104 if (!check_pixel_var(ref.cu[i].var, opt.cu[i].var))
|
|
2105 {
|
|
2106 printf("var[%dx%d] failed\n", 4 << i, 4 << i);
|
|
2107 return false;
|
|
2108 }
|
|
2109 }
|
|
2110
|
|
2111 if (opt.cu[i].psy_cost_pp)
|
|
2112 {
|
|
2113 if (!check_psyCost_pp(ref.cu[i].psy_cost_pp, opt.cu[i].psy_cost_pp))
|
|
2114 {
|
|
2115 printf("\npsy_cost_pp[%dx%d] failed!\n", 4 << i, 4 << i);
|
|
2116 return false;
|
|
2117 }
|
|
2118 }
|
|
2119
|
|
2120 if (opt.cu[i].psy_cost_ss)
|
|
2121 {
|
|
2122 if (!check_psyCost_ss(ref.cu[i].psy_cost_ss, opt.cu[i].psy_cost_ss))
|
|
2123 {
|
|
2124 printf("\npsy_cost_ss[%dx%d] failed!\n", 4 << i, 4 << i);
|
|
2125 return false;
|
|
2126 }
|
|
2127 }
|
|
2128
|
|
2129 if (i < BLOCK_64x64)
|
|
2130 {
|
|
2131 /* TU only primitives */
|
|
2132
|
|
2133 if (opt.cu[i].calcresidual)
|
|
2134 {
|
|
2135 if (!check_calresidual(ref.cu[i].calcresidual, opt.cu[i].calcresidual))
|
|
2136 {
|
|
2137 printf("calcresidual width: %d failed!\n", 4 << i);
|
|
2138 return false;
|
|
2139 }
|
|
2140 }
|
|
2141
|
|
2142 if (opt.cu[i].transpose)
|
|
2143 {
|
|
2144 if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
|
|
2145 {
|
|
2146 printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
|
|
2147 return false;
|
|
2148 }
|
|
2149 }
|
|
2150
|
|
2151 if (opt.cu[i].ssd_s)
|
|
2152 {
|
|
2153 if (!check_ssd_s(ref.cu[i].ssd_s, opt.cu[i].ssd_s))
|
|
2154 {
|
|
2155 printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
|
|
2156 return false;
|
|
2157 }
|
|
2158 }
|
|
2159
|
|
2160 if (opt.cu[i].copy_cnt)
|
|
2161 {
|
|
2162 if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
|
|
2163 {
|
|
2164 printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
|
|
2165 return false;
|
|
2166 }
|
|
2167 }
|
|
2168
|
|
2169 if (opt.cu[i].cpy2Dto1D_shl)
|
|
2170 {
|
|
2171 if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
|
|
2172 {
|
|
2173 printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
|
|
2174 return false;
|
|
2175 }
|
|
2176 }
|
|
2177
|
|
2178 if (opt.cu[i].cpy2Dto1D_shr)
|
|
2179 {
|
|
2180 if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
|
|
2181 {
|
|
2182 printf("cpy2Dto1D_shr failed!\n");
|
|
2183 return false;
|
|
2184 }
|
|
2185 }
|
|
2186
|
|
2187 if (opt.cu[i].cpy1Dto2D_shl)
|
|
2188 {
|
|
2189 if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl, opt.cu[i].cpy1Dto2D_shl))
|
|
2190 {
|
|
2191 printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
|
|
2192 return false;
|
|
2193 }
|
|
2194 }
|
|
2195
|
|
2196 if (opt.cu[i].cpy1Dto2D_shr)
|
|
2197 {
|
|
2198 if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
|
|
2199 {
|
|
2200 printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
|
|
2201 return false;
|
|
2202 }
|
|
2203 }
|
|
2204 }
|
|
2205 }
|
|
2206
|
|
2207 if (opt.weight_pp)
|
|
2208 {
|
|
2209 if (!check_weightp(ref.weight_pp, opt.weight_pp))
|
|
2210 {
|
|
2211 printf("Weighted Prediction (pixel) failed!\n");
|
|
2212 return false;
|
|
2213 }
|
|
2214 }
|
|
2215
|
|
2216 if (opt.weight_sp)
|
|
2217 {
|
|
2218 if (!check_weightp(ref.weight_sp, opt.weight_sp))
|
|
2219 {
|
|
2220 printf("Weighted Prediction (short) failed!\n");
|
|
2221 return false;
|
|
2222 }
|
|
2223 }
|
|
2224
|
|
2225 if (opt.frameInitLowres)
|
|
2226 {
|
|
2227 if (!check_downscale_t(ref.frameInitLowres, opt.frameInitLowres))
|
|
2228 {
|
|
2229 printf("downscale failed!\n");
|
|
2230 return false;
|
|
2231 }
|
|
2232 }
|
|
2233
|
|
2234 if (opt.scale1D_128to64)
|
|
2235 {
|
|
2236 if (!check_scale1D_pp(ref.scale1D_128to64, opt.scale1D_128to64))
|
|
2237 {
|
|
2238 printf("scale1D_128to64 failed!\n");
|
|
2239 return false;
|
|
2240 }
|
|
2241 }
|
|
2242
|
|
2243 if (opt.scale2D_64to32)
|
|
2244 {
|
|
2245 if (!check_scale2D_pp(ref.scale2D_64to32, opt.scale2D_64to32))
|
|
2246 {
|
|
2247 printf("scale2D_64to32 failed!\n");
|
|
2248 return false;
|
|
2249 }
|
|
2250 }
|
|
2251
|
|
2252 if (opt.ssim_4x4x2_core)
|
|
2253 {
|
|
2254 if (!check_ssim_4x4x2_core(ref.ssim_4x4x2_core, opt.ssim_4x4x2_core))
|
|
2255 {
|
|
2256 printf("ssim_end_4 failed!\n");
|
|
2257 return false;
|
|
2258 }
|
|
2259 }
|
|
2260
|
|
2261 if (opt.ssim_end_4)
|
|
2262 {
|
|
2263 if (!check_ssim_end(ref.ssim_end_4, opt.ssim_end_4))
|
|
2264 {
|
|
2265 printf("ssim_end_4 failed!\n");
|
|
2266 return false;
|
|
2267 }
|
|
2268 }
|
|
2269
|
|
2270 if (opt.sign)
|
|
2271 {
|
|
2272 if (!check_calSign(ref.sign, opt.sign))
|
|
2273 {
|
|
2274 printf("calSign failed\n");
|
|
2275 return false;
|
|
2276 }
|
|
2277 }
|
|
2278
|
|
2279 if (opt.saoCuOrgE0)
|
|
2280 {
|
|
2281 if (!check_saoCuOrgE0_t(ref.saoCuOrgE0, opt.saoCuOrgE0))
|
|
2282 {
|
|
2283 printf("SAO_EO_0 failed\n");
|
|
2284 return false;
|
|
2285 }
|
|
2286 }
|
|
2287
|
|
2288 if (opt.saoCuOrgE1)
|
|
2289 {
|
|
2290 if (!check_saoCuOrgE1_t(ref.saoCuOrgE1, opt.saoCuOrgE1))
|
|
2291 {
|
|
2292 printf("SAO_EO_1 failed\n");
|
|
2293 return false;
|
|
2294 }
|
|
2295 }
|
|
2296
|
|
2297 if (opt.saoCuOrgE1_2Rows)
|
|
2298 {
|
|
2299 if (!check_saoCuOrgE1_t(ref.saoCuOrgE1_2Rows, opt.saoCuOrgE1_2Rows))
|
|
2300 {
|
|
2301 printf("SAO_EO_1_2Rows failed\n");
|
|
2302 return false;
|
|
2303 }
|
|
2304 }
|
|
2305
|
|
2306 if (opt.saoCuOrgE2[0] || opt.saoCuOrgE2[1])
|
|
2307 {
|
|
2308 saoCuOrgE2_t ref1[] = { ref.saoCuOrgE2[0], ref.saoCuOrgE2[1] };
|
|
2309 saoCuOrgE2_t opt1[] = { opt.saoCuOrgE2[0], opt.saoCuOrgE2[1] };
|
|
2310
|
|
2311 if (!check_saoCuOrgE2_t(ref1, opt1))
|
|
2312 {
|
|
2313 printf("SAO_EO_2[0] && SAO_EO_2[1] failed\n");
|
|
2314 return false;
|
|
2315 }
|
|
2316 }
|
|
2317
|
|
2318 if (opt.saoCuOrgE3[0])
|
|
2319 {
|
|
2320 if (!check_saoCuOrgE3_t(ref.saoCuOrgE3[0], opt.saoCuOrgE3[0]))
|
|
2321 {
|
|
2322 printf("SAO_EO_3[0] failed\n");
|
|
2323 return false;
|
|
2324 }
|
|
2325 }
|
|
2326
|
|
2327 if (opt.saoCuOrgE3[1])
|
|
2328 {
|
|
2329 if (!check_saoCuOrgE3_32_t(ref.saoCuOrgE3[1], opt.saoCuOrgE3[1]))
|
|
2330 {
|
|
2331 printf("SAO_EO_3[1] failed\n");
|
|
2332 return false;
|
|
2333 }
|
|
2334 }
|
|
2335
|
|
2336 if (opt.saoCuOrgB0)
|
|
2337 {
|
|
2338 if (!check_saoCuOrgB0_t(ref.saoCuOrgB0, opt.saoCuOrgB0))
|
|
2339 {
|
|
2340 printf("SAO_BO_0 failed\n");
|
|
2341 return false;
|
|
2342 }
|
|
2343 }
|
|
2344
|
|
2345 if (opt.saoCuStatsBO)
|
|
2346 {
|
|
2347 if (!check_saoCuStatsBO_t(ref.saoCuStatsBO, opt.saoCuStatsBO))
|
|
2348 {
|
|
2349 printf("saoCuStatsBO failed\n");
|
|
2350 return false;
|
|
2351 }
|
|
2352 }
|
|
2353
|
|
2354 if (opt.saoCuStatsE0)
|
|
2355 {
|
|
2356 if (!check_saoCuStatsE0_t(ref.saoCuStatsE0, opt.saoCuStatsE0))
|
|
2357 {
|
|
2358 printf("saoCuStatsE0 failed\n");
|
|
2359 return false;
|
|
2360 }
|
|
2361 }
|
|
2362
|
|
2363 if (opt.saoCuStatsE1)
|
|
2364 {
|
|
2365 if (!check_saoCuStatsE1_t(ref.saoCuStatsE1, opt.saoCuStatsE1))
|
|
2366 {
|
|
2367 printf("saoCuStatsE1 failed\n");
|
|
2368 return false;
|
|
2369 }
|
|
2370 }
|
|
2371
|
|
2372 if (opt.saoCuStatsE2)
|
|
2373 {
|
|
2374 if (!check_saoCuStatsE2_t(ref.saoCuStatsE2, opt.saoCuStatsE2))
|
|
2375 {
|
|
2376 printf("saoCuStatsE2 failed\n");
|
|
2377 return false;
|
|
2378 }
|
|
2379 }
|
|
2380
|
|
2381 if (opt.saoCuStatsE3)
|
|
2382 {
|
|
2383 if (!check_saoCuStatsE3_t(ref.saoCuStatsE3, opt.saoCuStatsE3))
|
|
2384 {
|
|
2385 printf("saoCuStatsE3 failed\n");
|
|
2386 return false;
|
|
2387 }
|
|
2388 }
|
|
2389
|
|
2390 if (opt.planecopy_sp)
|
|
2391 {
|
|
2392 if (!check_planecopy_sp(ref.planecopy_sp, opt.planecopy_sp))
|
|
2393 {
|
|
2394 printf("planecopy_sp failed\n");
|
|
2395 return false;
|
|
2396 }
|
|
2397 }
|
|
2398
|
|
2399 if (opt.planecopy_sp_shl)
|
|
2400 {
|
|
2401 if (!check_planecopy_sp(ref.planecopy_sp_shl, opt.planecopy_sp_shl))
|
|
2402 {
|
|
2403 printf("planecopy_sp_shl failed\n");
|
|
2404 return false;
|
|
2405 }
|
|
2406 }
|
|
2407
|
|
2408 if (opt.planecopy_cp)
|
|
2409 {
|
|
2410 if (!check_planecopy_cp(ref.planecopy_cp, opt.planecopy_cp))
|
|
2411 {
|
|
2412 printf("planecopy_cp failed\n");
|
|
2413 return false;
|
|
2414 }
|
|
2415 }
|
|
2416
|
|
2417 if (opt.propagateCost)
|
|
2418 {
|
|
2419 if (!check_cutree_propagate_cost(ref.propagateCost, opt.propagateCost))
|
|
2420 {
|
|
2421 printf("propagateCost failed\n");
|
|
2422 return false;
|
|
2423 }
|
|
2424 }
|
|
2425
|
|
2426 if (opt.scanPosLast)
|
|
2427 {
|
|
2428 if (!check_scanPosLast(ref.scanPosLast, opt.scanPosLast))
|
|
2429 {
|
|
2430 printf("scanPosLast failed!\n");
|
|
2431 return false;
|
|
2432 }
|
|
2433 }
|
|
2434
|
|
2435 if (opt.findPosFirstLast)
|
|
2436 {
|
|
2437 if (!check_findPosFirstLast(ref.findPosFirstLast, opt.findPosFirstLast))
|
|
2438 {
|
|
2439 printf("findPosFirstLast failed!\n");
|
|
2440 return false;
|
|
2441 }
|
|
2442 }
|
|
2443
|
|
2444 if (opt.costCoeffNxN)
|
|
2445 {
|
|
2446 if (!check_costCoeffNxN(ref.costCoeffNxN, opt.costCoeffNxN))
|
|
2447 {
|
|
2448 printf("costCoeffNxN failed!\n");
|
|
2449 return false;
|
|
2450 }
|
|
2451 }
|
|
2452
|
|
2453 if (opt.costCoeffRemain)
|
|
2454 {
|
|
2455 if (!check_costCoeffRemain(ref.costCoeffRemain, opt.costCoeffRemain))
|
|
2456 {
|
|
2457 printf("costCoeffRemain failed!\n");
|
|
2458 return false;
|
|
2459 }
|
|
2460 }
|
|
2461
|
|
2462 if (opt.costC1C2Flag)
|
|
2463 {
|
|
2464 if (!check_costC1C2Flag(ref.costC1C2Flag, opt.costC1C2Flag))
|
|
2465 {
|
|
2466 printf("costC1C2Flag failed!\n");
|
|
2467 return false;
|
|
2468 }
|
|
2469 }
|
|
2470
|
|
2471
|
|
2472 if (opt.planeClipAndMax)
|
|
2473 {
|
|
2474 if (!check_planeClipAndMax(ref.planeClipAndMax, opt.planeClipAndMax))
|
|
2475 {
|
|
2476 printf("planeClipAndMax failed!\n");
|
|
2477 return false;
|
|
2478 }
|
|
2479 }
|
|
2480
|
|
2481 return true;
|
|
2482 }
|
|
2483
|
|
2484 void PixelHarness::measurePartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
|
|
2485 {
|
|
2486 ALIGN_VAR_16(int, cres[16]);
|
|
2487 pixel *fref = pbuf2 + 2 * INCR;
|
|
2488 char header[128];
|
|
2489 #define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
|
|
2490
|
|
2491 if (opt.pu[part].satd)
|
|
2492 {
|
|
2493 HEADER("satd[%s]", lumaPartStr[part]);
|
|
2494 REPORT_SPEEDUP(opt.pu[part].satd, ref.pu[part].satd, pbuf1, STRIDE, fref, STRIDE);
|
|
2495 }
|
|
2496
|
|
2497 if (opt.pu[part].pixelavg_pp)
|
|
2498 {
|
|
2499 HEADER("avg_pp[%s]", lumaPartStr[part]);
|
|
2500 REPORT_SPEEDUP(opt.pu[part].pixelavg_pp, ref.pu[part].pixelavg_pp, pbuf1, STRIDE, pbuf2, STRIDE, pbuf3, STRIDE, 32);
|
|
2501 }
|
|
2502
|
|
2503 if (opt.pu[part].sad)
|
|
2504 {
|
|
2505 HEADER("sad[%s]", lumaPartStr[part]);
|
|
2506 REPORT_SPEEDUP(opt.pu[part].sad, ref.pu[part].sad, pbuf1, STRIDE, fref, STRIDE);
|
|
2507 }
|
|
2508
|
|
2509 if (opt.pu[part].sad_x3)
|
|
2510 {
|
|
2511 HEADER("sad_x3[%s]", lumaPartStr[part]);
|
|
2512 REPORT_SPEEDUP(opt.pu[part].sad_x3, ref.pu[part].sad_x3, pbuf1, fref, fref + 1, fref - 1, FENC_STRIDE + 5, &cres[0]);
|
|
2513 }
|
|
2514
|
|
2515 if (opt.pu[part].sad_x4)
|
|
2516 {
|
|
2517 HEADER("sad_x4[%s]", lumaPartStr[part]);
|
|
2518 REPORT_SPEEDUP(opt.pu[part].sad_x4, ref.pu[part].sad_x4, pbuf1, fref, fref + 1, fref - 1, fref - INCR, FENC_STRIDE + 5, &cres[0]);
|
|
2519 }
|
|
2520
|
|
2521 if (opt.pu[part].copy_pp)
|
|
2522 {
|
|
2523 HEADER("copy_pp[%s]", lumaPartStr[part]);
|
|
2524 REPORT_SPEEDUP(opt.pu[part].copy_pp, ref.pu[part].copy_pp, pbuf1, 64, pbuf2, 64);
|
|
2525 }
|
|
2526
|
|
2527 if (opt.pu[part].addAvg)
|
|
2528 {
|
|
2529 HEADER("addAvg[%s]", lumaPartStr[part]);
|
|
2530 REPORT_SPEEDUP(opt.pu[part].addAvg, ref.pu[part].addAvg, sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE);
|
|
2531 }
|
|
2532
|
|
2533 if (part < NUM_CU_SIZES)
|
|
2534 {
|
|
2535 if (opt.cu[part].sse_pp)
|
|
2536 {
|
|
2537 HEADER("sse_pp[%s]", lumaPartStr[part]);
|
|
2538 REPORT_SPEEDUP(opt.cu[part].sse_pp, ref.cu[part].sse_pp, pbuf1, STRIDE, fref, STRIDE);
|
|
2539 }
|
|
2540
|
|
2541 if (opt.cu[part].sse_ss)
|
|
2542 {
|
|
2543 HEADER("sse_ss[%s]", lumaPartStr[part]);
|
|
2544 REPORT_SPEEDUP(opt.cu[part].sse_ss, ref.cu[part].sse_ss, (int16_t*)pbuf1, STRIDE, (int16_t*)fref, STRIDE);
|
|
2545 }
|
|
2546 if (opt.cu[part].sub_ps)
|
|
2547 {
|
|
2548 HEADER("sub_ps[%s]", lumaPartStr[part]);
|
|
2549 REPORT_SPEEDUP(opt.cu[part].sub_ps, ref.cu[part].sub_ps, (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
|
|
2550 }
|
|
2551 if (opt.cu[part].add_ps)
|
|
2552 {
|
|
2553 HEADER("add_ps[%s]", lumaPartStr[part]);
|
|
2554 REPORT_SPEEDUP(opt.cu[part].add_ps, ref.cu[part].add_ps, pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
|
|
2555 }
|
|
2556 if (opt.cu[part].copy_ss)
|
|
2557 {
|
|
2558 HEADER("copy_ss[%s]", lumaPartStr[part]);
|
|
2559 REPORT_SPEEDUP(opt.cu[part].copy_ss, ref.cu[part].copy_ss, sbuf1, 128, sbuf2, 128);
|
|
2560 }
|
|
2561 if (opt.cu[part].copy_sp)
|
|
2562 {
|
|
2563 HEADER("copy_sp[%s]", lumaPartStr[part]);
|
|
2564 REPORT_SPEEDUP(opt.cu[part].copy_sp, ref.cu[part].copy_sp, pbuf1, 64, sbuf3, 128);
|
|
2565 }
|
|
2566 if (opt.cu[part].copy_ps)
|
|
2567 {
|
|
2568 HEADER("copy_ps[%s]", lumaPartStr[part]);
|
|
2569 REPORT_SPEEDUP(opt.cu[part].copy_ps, ref.cu[part].copy_ps, sbuf1, 128, pbuf1, 64);
|
|
2570 }
|
|
2571 }
|
|
2572
|
|
2573 for (int i = 0; i < X265_CSP_COUNT; i++)
|
|
2574 {
|
|
2575 if (opt.chroma[i].pu[part].copy_pp)
|
|
2576 {
|
|
2577 HEADER("[%s] copy_pp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2578 REPORT_SPEEDUP(opt.chroma[i].pu[part].copy_pp, ref.chroma[i].pu[part].copy_pp, pbuf1, 64, pbuf2, 128);
|
|
2579 }
|
|
2580 if (opt.chroma[i].pu[part].addAvg)
|
|
2581 {
|
|
2582 HEADER("[%s] addAvg[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2583 REPORT_SPEEDUP(opt.chroma[i].pu[part].addAvg, ref.chroma[i].pu[part].addAvg, sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE);
|
|
2584 }
|
|
2585 if (opt.chroma[i].pu[part].satd)
|
|
2586 {
|
|
2587 HEADER("[%s] satd[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2588 REPORT_SPEEDUP(opt.chroma[i].pu[part].satd, ref.chroma[i].pu[part].satd, pbuf1, STRIDE, fref, STRIDE);
|
|
2589 }
|
|
2590 if (part < NUM_CU_SIZES)
|
|
2591 {
|
|
2592 if (opt.chroma[i].cu[part].copy_ss)
|
|
2593 {
|
|
2594 HEADER("[%s] copy_ss[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2595 REPORT_SPEEDUP(opt.chroma[i].cu[part].copy_ss, ref.chroma[i].cu[part].copy_ss, sbuf1, 64, sbuf2, 128);
|
|
2596 }
|
|
2597 if (opt.chroma[i].cu[part].copy_ps)
|
|
2598 {
|
|
2599 HEADER("[%s] copy_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2600 REPORT_SPEEDUP(opt.chroma[i].cu[part].copy_ps, ref.chroma[i].cu[part].copy_ps, sbuf1, 64, pbuf1, 128);
|
|
2601 }
|
|
2602 if (opt.chroma[i].cu[part].copy_sp)
|
|
2603 {
|
|
2604 HEADER("[%s] copy_sp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2605 REPORT_SPEEDUP(opt.chroma[i].cu[part].copy_sp, ref.chroma[i].cu[part].copy_sp, pbuf1, 64, sbuf3, 128);
|
|
2606 }
|
|
2607 if (opt.chroma[i].cu[part].sse_pp)
|
|
2608 {
|
|
2609 HEADER("[%s] sse_pp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2610 REPORT_SPEEDUP(opt.chroma[i].cu[part].sse_pp, ref.chroma[i].cu[part].sse_pp, pbuf1, STRIDE, fref, STRIDE);
|
|
2611 }
|
|
2612 if (opt.chroma[i].cu[part].sub_ps)
|
|
2613 {
|
|
2614 HEADER("[%s] sub_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2615 REPORT_SPEEDUP(opt.chroma[i].cu[part].sub_ps, ref.chroma[i].cu[part].sub_ps, (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
|
|
2616 }
|
|
2617 if (opt.chroma[i].cu[part].add_ps)
|
|
2618 {
|
|
2619 HEADER("[%s] add_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2620 REPORT_SPEEDUP(opt.chroma[i].cu[part].add_ps, ref.chroma[i].cu[part].add_ps, pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE);
|
|
2621 }
|
|
2622 if (opt.chroma[i].cu[part].sa8d)
|
|
2623 {
|
|
2624 HEADER("[%s] sa8d[%s]", x265_source_csp_names[i], chromaPartStr[i][part]);
|
|
2625 REPORT_SPEEDUP(opt.chroma[i].cu[part].sa8d, ref.chroma[i].cu[part].sa8d, pbuf1, STRIDE, pbuf2, STRIDE);
|
|
2626 }
|
|
2627 }
|
|
2628 }
|
|
2629
|
|
2630 #undef HEADER
|
|
2631 }
|
|
2632
|
|
2633 void PixelHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
|
|
2634 {
|
|
2635 char header[128];
|
|
2636
|
|
2637 #define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header);
|
|
2638 #define HEADER0(str) printf("%22s", str);
|
|
2639
|
|
2640 for (int size = 4; size <= 64; size *= 2)
|
|
2641 {
|
|
2642 int part = partitionFromSizes(size, size); // 2Nx2N
|
|
2643 measurePartition(part, ref, opt);
|
|
2644
|
|
2645 if (size > 4)
|
|
2646 {
|
|
2647 part = partitionFromSizes(size, size >> 1); // 2NxN
|
|
2648 measurePartition(part, ref, opt);
|
|
2649 part = partitionFromSizes(size >> 1, size); // Nx2N
|
|
2650 measurePartition(part, ref, opt);
|
|
2651 }
|
|
2652 if (size > 8)
|
|
2653 {
|
|
2654 // 4 AMP modes
|
|
2655 part = partitionFromSizes(size, size >> 2);
|
|
2656 measurePartition(part, ref, opt);
|
|
2657 part = partitionFromSizes(size, 3 * (size >> 2));
|
|
2658 measurePartition(part, ref, opt);
|
|
2659
|
|
2660 part = partitionFromSizes(size >> 2, size);
|
|
2661 measurePartition(part, ref, opt);
|
|
2662 part = partitionFromSizes(3 * (size >> 2), size);
|
|
2663 measurePartition(part, ref, opt);
|
|
2664 }
|
|
2665 }
|
|
2666
|
|
2667 for (int i = 0; i < NUM_CU_SIZES; i++)
|
|
2668 {
|
|
2669 if ((i <= BLOCK_32x32) && opt.cu[i].ssd_s)
|
|
2670 {
|
|
2671 HEADER("ssd_s[%dx%d]", 4 << i, 4 << i);
|
|
2672 REPORT_SPEEDUP(opt.cu[i].ssd_s, ref.cu[i].ssd_s, sbuf1, STRIDE);
|
|
2673 }
|
|
2674 if (opt.cu[i].sa8d)
|
|
2675 {
|
|
2676 HEADER("sa8d[%dx%d]", 4 << i, 4 << i);
|
|
2677 REPORT_SPEEDUP(opt.cu[i].sa8d, ref.cu[i].sa8d, pbuf1, STRIDE, pbuf2, STRIDE);
|
|
2678 }
|
|
2679 if (opt.cu[i].calcresidual)
|
|
2680 {
|
|
2681 HEADER("residual[%dx%d]", 4 << i, 4 << i);
|
|
2682 REPORT_SPEEDUP(opt.cu[i].calcresidual, ref.cu[i].calcresidual, pbuf1, pbuf2, sbuf1, 64);
|
|
2683 }
|
|
2684
|
|
2685 if (opt.cu[i].blockfill_s)
|
|
2686 {
|
|
2687 HEADER("blkfill[%dx%d]", 4 << i, 4 << i);
|
|
2688 REPORT_SPEEDUP(opt.cu[i].blockfill_s, ref.cu[i].blockfill_s, sbuf1, 64, SHORT_MAX);
|
|
2689 }
|
|
2690
|
|
2691 if (opt.cu[i].transpose)
|
|
2692 {
|
|
2693 HEADER("transpose[%dx%d]", 4 << i, 4 << i);
|
|
2694 REPORT_SPEEDUP(opt.cu[i].transpose, ref.cu[i].transpose, pbuf1, pbuf2, STRIDE);
|
|
2695 }
|
|
2696
|
|
2697 if (opt.cu[i].var)
|
|
2698 {
|
|
2699 HEADER("var[%dx%d]", 4 << i, 4 << i);
|
|
2700 REPORT_SPEEDUP(opt.cu[i].var, ref.cu[i].var, pbuf1, STRIDE);
|
|
2701 }
|
|
2702
|
|
2703 if ((i < BLOCK_64x64) && opt.cu[i].cpy2Dto1D_shl)
|
|
2704 {
|
|
2705 HEADER("cpy2Dto1D_shl[%dx%d]", 4 << i, 4 << i);
|
|
2706 const int shift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - (i + 2);
|
|
2707 REPORT_SPEEDUP(opt.cu[i].cpy2Dto1D_shl, ref.cu[i].cpy2Dto1D_shl, sbuf1, sbuf2, STRIDE, X265_MAX(0, shift));
|
|
2708 }
|
|
2709
|
|
2710 if ((i < BLOCK_64x64) && opt.cu[i].cpy2Dto1D_shr)
|
|
2711 {
|
|
2712 HEADER("cpy2Dto1D_shr[%dx%d]", 4 << i, 4 << i);
|
|
2713 REPORT_SPEEDUP(opt.cu[i].cpy2Dto1D_shr, ref.cu[i].cpy2Dto1D_shr, sbuf1, sbuf2, STRIDE, 3);
|
|
2714 }
|
|
2715
|
|
2716 if ((i < BLOCK_64x64) && opt.cu[i].cpy1Dto2D_shl)
|
|
2717 {
|
|
2718 HEADER("cpy1Dto2D_shl[%dx%d]", 4 << i, 4 << i);
|
|
2719 REPORT_SPEEDUP(opt.cu[i].cpy1Dto2D_shl, ref.cu[i].cpy1Dto2D_shl, sbuf1, sbuf2, STRIDE, 64);
|
|
2720 }
|
|
2721
|
|
2722 if ((i < BLOCK_64x64) && opt.cu[i].cpy1Dto2D_shr)
|
|
2723 {
|
|
2724 HEADER("cpy1Dto2D_shr[%dx%d]", 4 << i, 4 << i);
|
|
2725 REPORT_SPEEDUP(opt.cu[i].cpy1Dto2D_shr, ref.cu[i].cpy1Dto2D_shr, sbuf1, sbuf2, STRIDE, 64);
|
|
2726 }
|
|
2727
|
|
2728 if ((i < BLOCK_64x64) && opt.cu[i].copy_cnt)
|
|
2729 {
|
|
2730 HEADER("copy_cnt[%dx%d]", 4 << i, 4 << i);
|
|
2731 REPORT_SPEEDUP(opt.cu[i].copy_cnt, ref.cu[i].copy_cnt, sbuf1, sbuf2, STRIDE);
|
|
2732 }
|
|
2733
|
|
2734 if (opt.cu[i].psy_cost_pp)
|
|
2735 {
|
|
2736 HEADER("psy_cost_pp[%dx%d]", 4 << i, 4 << i);
|
|
2737 REPORT_SPEEDUP(opt.cu[i].psy_cost_pp, ref.cu[i].psy_cost_pp, pbuf1, STRIDE, pbuf2, STRIDE);
|
|
2738 }
|
|
2739
|
|
2740 if (opt.cu[i].psy_cost_ss)
|
|
2741 {
|
|
2742 HEADER("psy_cost_ss[%dx%d]", 4 << i, 4 << i);
|
|
2743 REPORT_SPEEDUP(opt.cu[i].psy_cost_ss, ref.cu[i].psy_cost_ss, sbuf1, STRIDE, sbuf2, STRIDE);
|
|
2744 }
|
|
2745 }
|
|
2746
|
|
2747 if (opt.weight_pp)
|
|
2748 {
|
|
2749 HEADER0("weight_pp");
|
|
2750 REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 32, 32, 128, 1 << 9, 10, 100);
|
|
2751 }
|
|
2752
|
|
2753 if (opt.weight_sp)
|
|
2754 {
|
|
2755 HEADER0("weight_sp");
|
|
2756 REPORT_SPEEDUP(opt.weight_sp, ref.weight_sp, (int16_t*)sbuf1, pbuf1, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
|
|
2757 }
|
|
2758
|
|
2759 if (opt.frameInitLowres)
|
|
2760 {
|
|
2761 HEADER0("downscale");
|
|
2762 REPORT_SPEEDUP(opt.frameInitLowres, ref.frameInitLowres, pbuf2, pbuf1, pbuf2, pbuf3, pbuf4, 64, 64, 64, 64);
|
|
2763 }
|
|
2764
|
|
2765 if (opt.scale1D_128to64)
|
|
2766 {
|
|
2767 HEADER0("scale1D_128to64");
|
|
2768 REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1);
|
|
2769 }
|
|
2770
|
|
2771 if (opt.scale2D_64to32)
|
|
2772 {
|
|
2773 HEADER0("scale2D_64to32");
|
|
2774 REPORT_SPEEDUP(opt.scale2D_64to32, ref.scale2D_64to32, pbuf2, pbuf1, 64);
|
|
2775 }
|
|
2776
|
|
2777 if (opt.ssim_4x4x2_core)
|
|
2778 {
|
|
2779 HEADER0("ssim_4x4x2_core");
|
|
2780 REPORT_SPEEDUP(opt.ssim_4x4x2_core, ref.ssim_4x4x2_core, pbuf1, 64, pbuf2, 64, (int(*)[4])sbuf1);
|
|
2781 }
|
|
2782
|
|
2783 if (opt.ssim_end_4)
|
|
2784 {
|
|
2785 HEADER0("ssim_end_4");
|
|
2786 REPORT_SPEEDUP(opt.ssim_end_4, ref.ssim_end_4, (int(*)[4])pbuf2, (int(*)[4])pbuf1, 4);
|
|
2787 }
|
|
2788
|
|
2789 if (opt.sign)
|
|
2790 {
|
|
2791 HEADER0("calSign");
|
|
2792 REPORT_SPEEDUP(opt.sign, ref.sign, psbuf1, pbuf1, pbuf2, 64);
|
|
2793 }
|
|
2794
|
|
2795 if (opt.saoCuOrgE0)
|
|
2796 {
|
|
2797 HEADER0("SAO_EO_0");
|
|
2798 REPORT_SPEEDUP(opt.saoCuOrgE0, ref.saoCuOrgE0, pbuf1, psbuf1, 64, psbuf2, 64);
|
|
2799 }
|
|
2800
|
|
2801 if (opt.saoCuOrgE1)
|
|
2802 {
|
|
2803 HEADER0("SAO_EO_1");
|
|
2804 REPORT_SPEEDUP(opt.saoCuOrgE1, ref.saoCuOrgE1, pbuf1, psbuf2, psbuf1, 64, 64);
|
|
2805 }
|
|
2806
|
|
2807 if (opt.saoCuOrgE1_2Rows)
|
|
2808 {
|
|
2809 HEADER0("SAO_EO_1_2Rows");
|
|
2810 REPORT_SPEEDUP(opt.saoCuOrgE1_2Rows, ref.saoCuOrgE1_2Rows, pbuf1, psbuf2, psbuf1, 64, 64);
|
|
2811 }
|
|
2812
|
|
2813 if (opt.saoCuOrgE2[0])
|
|
2814 {
|
|
2815 HEADER0("SAO_EO_2[0]");
|
|
2816 REPORT_SPEEDUP(opt.saoCuOrgE2[0], ref.saoCuOrgE2[0], pbuf1, psbuf1, psbuf2, psbuf3, 16, 64);
|
|
2817 }
|
|
2818
|
|
2819 if (opt.saoCuOrgE2[1])
|
|
2820 {
|
|
2821 HEADER0("SAO_EO_2[1]");
|
|
2822 REPORT_SPEEDUP(opt.saoCuOrgE2[1], ref.saoCuOrgE2[1], pbuf1, psbuf1, psbuf2, psbuf3, 64, 64);
|
|
2823 }
|
|
2824
|
|
2825 if (opt.saoCuOrgE3[0])
|
|
2826 {
|
|
2827 HEADER0("SAO_EO_3[0]");
|
|
2828 REPORT_SPEEDUP(opt.saoCuOrgE3[0], ref.saoCuOrgE3[0], pbuf1, psbuf2, psbuf1, 64, 0, 16);
|
|
2829 }
|
|
2830
|
|
2831 if (opt.saoCuOrgE3[1])
|
|
2832 {
|
|
2833 HEADER0("SAO_EO_3[1]");
|
|
2834 REPORT_SPEEDUP(opt.saoCuOrgE3[1], ref.saoCuOrgE3[1], pbuf1, psbuf2, psbuf1, 64, 0, 64);
|
|
2835 }
|
|
2836
|
|
2837 if (opt.saoCuOrgB0)
|
|
2838 {
|
|
2839 HEADER0("SAO_BO_0");
|
|
2840 REPORT_SPEEDUP(opt.saoCuOrgB0, ref.saoCuOrgB0, pbuf1, psbuf1, 64, 64, 64);
|
|
2841 }
|
|
2842
|
|
2843 if (opt.saoCuStatsBO)
|
|
2844 {
|
|
2845 int32_t stats[33], count[33];
|
|
2846 HEADER0("saoCuStatsBO");
|
|
2847 REPORT_SPEEDUP(opt.saoCuStatsBO, ref.saoCuStatsBO, pbuf2, pbuf3, 64, 60, 61, stats, count);
|
|
2848 }
|
|
2849
|
|
2850 if (opt.saoCuStatsE0)
|
|
2851 {
|
|
2852 int32_t stats[33], count[33];
|
|
2853 HEADER0("saoCuStatsE0");
|
|
2854 REPORT_SPEEDUP(opt.saoCuStatsE0, ref.saoCuStatsE0, pbuf2, pbuf3, 64, 60, 61, stats, count);
|
|
2855 }
|
|
2856
|
|
2857 if (opt.saoCuStatsE1)
|
|
2858 {
|
|
2859 int32_t stats[5], count[5];
|
|
2860 int8_t upBuff1[MAX_CU_SIZE + 2];
|
|
2861 memset(upBuff1, 1, sizeof(upBuff1));
|
|
2862 HEADER0("saoCuStatsE1");
|
|
2863 REPORT_SPEEDUP(opt.saoCuStatsE1, ref.saoCuStatsE1, pbuf2, pbuf3, 64, upBuff1 + 1,60, 61, stats, count);
|
|
2864 }
|
|
2865
|
|
2866 if (opt.saoCuStatsE2)
|
|
2867 {
|
|
2868 int32_t stats[5], count[5];
|
|
2869 int8_t upBuff1[MAX_CU_SIZE + 2];
|
|
2870 int8_t upBufft[MAX_CU_SIZE + 2];
|
|
2871 memset(upBuff1, 1, sizeof(upBuff1));
|
|
2872 memset(upBufft, -1, sizeof(upBufft));
|
|
2873 HEADER0("saoCuStatsE2");
|
|
2874 REPORT_SPEEDUP(opt.saoCuStatsE2, ref.saoCuStatsE2, pbuf2, pbuf3, 64, upBuff1 + 1, upBufft + 1, 60, 61, stats, count);
|
|
2875 }
|
|
2876
|
|
2877 if (opt.saoCuStatsE3)
|
|
2878 {
|
|
2879 int8_t upBuff1[MAX_CU_SIZE + 2];
|
|
2880 int32_t stats[5], count[5];
|
|
2881 memset(upBuff1, 1, sizeof(upBuff1));
|
|
2882 HEADER0("saoCuStatsE3");
|
|
2883 REPORT_SPEEDUP(opt.saoCuStatsE3, ref.saoCuStatsE3, pbuf2, pbuf3, 64, upBuff1 + 1, 60, 61, stats, count);
|
|
2884 }
|
|
2885
|
|
2886 if (opt.planecopy_sp)
|
|
2887 {
|
|
2888 HEADER0("planecopy_sp");
|
|
2889 REPORT_SPEEDUP(opt.planecopy_sp, ref.planecopy_sp, ushort_test_buff[0], 64, pbuf1, 64, 64, 64, 8, 255);
|
|
2890 }
|
|
2891
|
|
2892 if (opt.planecopy_cp)
|
|
2893 {
|
|
2894 HEADER0("planecopy_cp");
|
|
2895 REPORT_SPEEDUP(opt.planecopy_cp, ref.planecopy_cp, uchar_test_buff[0], 64, pbuf1, 64, 64, 64, 2);
|
|
2896 }
|
|
2897
|
|
2898 if (opt.propagateCost)
|
|
2899 {
|
|
2900 HEADER0("propagateCost");
|
|
2901 REPORT_SPEEDUP(opt.propagateCost, ref.propagateCost, ibuf1, ushort_test_buff[0], int_test_buff[0], ushort_test_buff[0], int_test_buff[0], double_test_buff[0], 80);
|
|
2902 }
|
|
2903
|
|
2904 if (opt.scanPosLast)
|
|
2905 {
|
|
2906 HEADER0("scanPosLast");
|
|
2907 coeff_t coefBuf[32 * 32];
|
|
2908 memset(coefBuf, 0, sizeof(coefBuf));
|
|
2909 memset(coefBuf + 32 * 31, 1, 32 * sizeof(coeff_t));
|
|
2910 REPORT_SPEEDUP(opt.scanPosLast, ref.scanPosLast, g_scanOrder[SCAN_DIAG][NUM_SCAN_SIZE - 1], coefBuf, (uint16_t*)sbuf1, (uint16_t*)sbuf2, (uint8_t*)psbuf1, 32, g_scan4x4[SCAN_DIAG], 32);
|
|
2911 }
|
|
2912
|
|
2913 if (opt.findPosFirstLast)
|
|
2914 {
|
|
2915 HEADER0("findPosFirstLast");
|
|
2916 coeff_t coefBuf[32 * MLS_CG_SIZE];
|
|
2917 memset(coefBuf, 0, sizeof(coefBuf));
|
|
2918 // every CG can't be all zeros!
|
|
2919 coefBuf[3 + 0 * 32] = 0x0BAD;
|
|
2920 coefBuf[3 + 1 * 32] = 0x0BAD;
|
|
2921 coefBuf[3 + 2 * 32] = 0x0BAD;
|
|
2922 coefBuf[3 + 3 * 32] = 0x0BAD;
|
|
2923 REPORT_SPEEDUP(opt.findPosFirstLast, ref.findPosFirstLast, coefBuf, 32, g_scan4x4[SCAN_DIAG]);
|
|
2924 }
|
|
2925
|
|
2926 if (opt.costCoeffNxN)
|
|
2927 {
|
|
2928 HEADER0("costCoeffNxN");
|
|
2929 coeff_t coefBuf[32 * 32];
|
|
2930 uint16_t tmpOut[16];
|
|
2931 memset(coefBuf, 1, sizeof(coefBuf));
|
|
2932 ALIGN_VAR_32(static uint8_t const, ctxSig[]) =
|
|
2933 {
|
|
2934 0, 1, 4, 5,
|
|
2935 2, 3, 4, 5,
|
|
2936 6, 6, 8, 8,
|
|
2937 7, 7, 8, 8
|
|
2938 };
|
|
2939 uint8_t ctx[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
|
|
2940 memset(ctx, 120, sizeof(ctx));
|
|
2941
|
|
2942 REPORT_SPEEDUP(opt.costCoeffNxN, ref.costCoeffNxN, g_scan4x4[SCAN_DIAG], coefBuf, 32, tmpOut, ctxSig, 0xFFFF, ctx, 1, 15, 32);
|
|
2943 }
|
|
2944
|
|
2945 if (opt.costCoeffRemain)
|
|
2946 {
|
|
2947 HEADER0("costCoeffRemain");
|
|
2948 uint16_t abscoefBuf[32 * 32];
|
|
2949 memset(abscoefBuf, 0, sizeof(abscoefBuf));
|
|
2950 memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t));
|
|
2951 REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain, abscoefBuf, 16, 3);
|
|
2952 }
|
|
2953
|
|
2954 if (opt.costC1C2Flag)
|
|
2955 {
|
|
2956 HEADER0("costC1C2Flag");
|
|
2957 ALIGN_VAR_32(uint16_t, abscoefBuf[C1FLAG_NUMBER]);
|
|
2958 memset(abscoefBuf, 1, sizeof(abscoefBuf));
|
|
2959 abscoefBuf[C1FLAG_NUMBER - 2] = 2;
|
|
2960 abscoefBuf[C1FLAG_NUMBER - 1] = 3;
|
|
2961 REPORT_SPEEDUP(opt.costC1C2Flag, ref.costC1C2Flag, abscoefBuf, C1FLAG_NUMBER, (uint8_t*)psbuf1, 1);
|
|
2962 }
|
|
2963
|
|
2964 if (opt.planeClipAndMax)
|
|
2965 {
|
|
2966 HEADER0("planeClipAndMax");
|
|
2967 uint64_t dummy;
|
|
2968 REPORT_SPEEDUP(opt.planeClipAndMax, ref.planeClipAndMax, pbuf1, 128, 63, 62, &dummy, 1, PIXEL_MAX - 1);
|
|
2969 }
|
|
2970 }
|