0
|
1 /*****************************************************************************
|
|
2 * Copyright (C) 2013 x265 project
|
|
3 *
|
|
4 * Authors: Steve Borho <steve@borho.org>
|
|
5 *
|
|
6 * This program is free software; you can redistribute it and/or modify
|
|
7 * it under the terms of the GNU General Public License as published by
|
|
8 * the Free Software Foundation; either version 2 of the License, or
|
|
9 * (at your option) any later version.
|
|
10 *
|
|
11 * This program is distributed in the hope that it will be useful,
|
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 * GNU General Public License for more details.
|
|
15 *
|
|
16 * You should have received a copy of the GNU General Public License
|
|
17 * along with this program; if not, write to the Free Software
|
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
|
19 *
|
|
20 * This program is also available under a commercial proprietary license.
|
|
21 * For more information, contact us at license @ x265.com.
|
|
22 *****************************************************************************/
|
|
23
|
|
24 #ifndef _TESTHARNESS_H_
|
|
25 #define _TESTHARNESS_H_ 1
|
|
26
|
|
27 #include "common.h"
|
|
28 #include "primitives.h"
|
|
29
|
|
30 #if _MSC_VER
|
|
31 #pragma warning(disable: 4324) // structure was padded due to __declspec(align())
|
|
32 #endif
|
|
33
|
|
34 #define PIXEL_MAX ((1 << X265_DEPTH) - 1)
|
|
35 #define PIXEL_MIN 0
|
|
36 #define SHORT_MAX 32767
|
|
37 #define SHORT_MIN -32767
|
|
38 #define UNSIGNED_SHORT_MAX 65535
|
|
39
|
|
40 using namespace X265_NS;
|
|
41
|
|
42 extern const char* lumaPartStr[NUM_PU_SIZES];
|
|
43 extern const char* const* chromaPartStr[X265_CSP_COUNT];
|
|
44
|
|
45 class TestHarness
|
|
46 {
|
|
47 public:
|
|
48
|
|
49 TestHarness() {}
|
|
50
|
|
51 virtual ~TestHarness() {}
|
|
52
|
|
53 virtual bool testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0;
|
|
54
|
|
55 virtual void measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0;
|
|
56
|
|
57 virtual const char *getName() const = 0;
|
|
58
|
|
59 protected:
|
|
60
|
|
61 /* Temporary variables for stack checks */
|
|
62 int m_ok;
|
|
63
|
|
64 uint64_t m_rand;
|
|
65 };
|
|
66
|
|
67 #ifdef _MSC_VER
|
|
68 #include <intrin.h>
|
|
69 #elif HAVE_RDTSC
|
|
70 #include <intrin.h>
|
|
71 #elif defined(__GNUC__)
|
|
72 /* fallback for older GCC/MinGW */
|
|
73 static inline uint32_t __rdtsc(void)
|
|
74 {
|
|
75 uint32_t a = 0;
|
|
76
|
|
77 asm volatile("rdtsc" : "=a" (a) ::"edx");
|
|
78 return a;
|
|
79 }
|
|
80
|
|
81 #endif // ifdef _MSC_VER
|
|
82
|
|
83 #define BENCH_RUNS 1000
|
|
84
|
|
85 // Adapted from checkasm.c, runs each optimized primitive four times, measures rdtsc
|
|
86 // and discards invalid times. Repeats 1000 times to get a good average. Then measures
|
|
87 // the C reference with fewer runs and reports X factor and average cycles.
|
|
88 #define REPORT_SPEEDUP(RUNOPT, RUNREF, ...) \
|
|
89 { \
|
|
90 uint32_t cycles = 0; int runs = 0; \
|
|
91 RUNOPT(__VA_ARGS__); \
|
|
92 for (int ti = 0; ti < BENCH_RUNS; ti++) { \
|
|
93 uint32_t t0 = (uint32_t)__rdtsc(); \
|
|
94 RUNOPT(__VA_ARGS__); \
|
|
95 RUNOPT(__VA_ARGS__); \
|
|
96 RUNOPT(__VA_ARGS__); \
|
|
97 RUNOPT(__VA_ARGS__); \
|
|
98 uint32_t t1 = (uint32_t)__rdtsc() - t0; \
|
|
99 if (t1 * runs <= cycles * 4 && ti > 0) { cycles += t1; runs++; } \
|
|
100 } \
|
|
101 uint32_t refcycles = 0; int refruns = 0; \
|
|
102 RUNREF(__VA_ARGS__); \
|
|
103 for (int ti = 0; ti < BENCH_RUNS / 4; ti++) { \
|
|
104 uint32_t t0 = (uint32_t)__rdtsc(); \
|
|
105 RUNREF(__VA_ARGS__); \
|
|
106 RUNREF(__VA_ARGS__); \
|
|
107 RUNREF(__VA_ARGS__); \
|
|
108 RUNREF(__VA_ARGS__); \
|
|
109 uint32_t t1 = (uint32_t)__rdtsc() - t0; \
|
|
110 if (t1 * refruns <= refcycles * 4 && ti > 0) { refcycles += t1; refruns++; } \
|
|
111 } \
|
|
112 x265_emms(); \
|
|
113 float optperf = (10.0f * cycles / runs) / 4; \
|
|
114 float refperf = (10.0f * refcycles / refruns) / 4; \
|
|
115 printf("\t%3.2fx ", refperf / optperf); \
|
|
116 printf("\t %-8.2lf \t %-8.2lf\n", optperf, refperf); \
|
|
117 }
|
|
118
|
|
119 extern "C" {
|
|
120 #if X265_ARCH_X86
|
|
121 int PFX(stack_pagealign)(int (*func)(), int align);
|
|
122
|
|
123 /* detect when callee-saved regs aren't saved
|
|
124 * needs an explicit asm check because it only sometimes crashes in normal use. */
|
|
125 intptr_t PFX(checkasm_call)(intptr_t (*func)(), int *ok, ...);
|
|
126 float PFX(checkasm_call_float)(float (*func)(), int *ok, ...);
|
|
127 #else
|
|
128 #define PFX(stack_pagealign)(func, align) func()
|
|
129 #endif
|
|
130
|
|
131 #if X86_64
|
|
132
|
|
133 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
|
|
134 * This is done by clobbering the stack with junk around the stack pointer and calling the
|
|
135 * assembly function through x265_checkasm_call with added dummy arguments which forces all
|
|
136 * real arguments to be passed on the stack and not in registers. For 32-bit argument the
|
|
137 * upper half of the 64-bit register location on the stack will now contain junk. Note that
|
|
138 * this is dependent on compiler behavior and that interrupts etc. at the wrong time may
|
|
139 * overwrite the junk written to the stack so there's no guarantee that it will always
|
|
140 * detect all functions that assumes zero-extension.
|
|
141 */
|
|
142 void PFX(checkasm_stack_clobber)(uint64_t clobber, ...);
|
|
143 #define checked(func, ...) ( \
|
|
144 m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
|
|
145 PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
|
|
146 m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
|
|
147 m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
|
|
148 PFX(checkasm_call)((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
|
|
149
|
|
150 #define checked_float(func, ...) ( \
|
|
151 m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
|
|
152 PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
|
|
153 m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
|
|
154 m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
|
|
155 PFX(checkasm_call_float)((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
|
|
156 #define reportfail() if (!m_ok) { fflush(stdout); fprintf(stderr, "stack clobber check failed at %s:%d", __FILE__, __LINE__); abort(); }
|
|
157 #elif ARCH_X86
|
|
158 #define checked(func, ...) PFX(checkasm_call)((intptr_t(*)())func, &m_ok, __VA_ARGS__);
|
|
159 #define checked_float(func, ...) PFX(checkasm_call_float)((float(*)())func, &m_ok, __VA_ARGS__);
|
|
160
|
|
161 #else // if X86_64
|
|
162 #define checked(func, ...) func(__VA_ARGS__)
|
|
163 #define checked_float(func, ...) func(__VA_ARGS__)
|
|
164 #define reportfail()
|
|
165 #endif // if X86_64
|
|
166 }
|
|
167
|
|
168 #endif // ifndef _TESTHARNESS_H_
|