0
|
1 /* The copyright in this software is being made available under the BSD
|
|
2 * License, included below. This software may be subject to other third party
|
|
3 * and contributor rights, including patent rights, and no such rights are
|
|
4 * granted under this license.
|
|
5 *
|
|
6 * Copyright (c) 2010-2014, ITU/ISO/IEC
|
|
7 * All rights reserved.
|
|
8 *
|
|
9 * Redistribution and use in source and binary forms, with or without
|
|
10 * modification, are permitted provided that the following conditions are met:
|
|
11 *
|
|
12 * * Redistributions of source code must retain the above copyright notice,
|
|
13 * this list of conditions and the following disclaimer.
|
|
14 * * Redistributions in binary form must reproduce the above copyright notice,
|
|
15 * this list of conditions and the following disclaimer in the documentation
|
|
16 * and/or other materials provided with the distribution.
|
|
17 * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
|
|
18 * be used to endorse or promote products derived from this software without
|
|
19 * specific prior written permission.
|
|
20 *
|
|
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
|
|
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
31 * THE POSSIBILITY OF SUCH DAMAGE.
|
|
32 */
|
|
33
|
|
34 /** \file TComRdCost.cpp
|
|
35 \brief RD cost computation class
|
|
36 */
|
|
37
|
|
38 #include <math.h>
|
|
39 #include <assert.h>
|
|
40 #include "TComRom.h"
|
|
41 #include "TComRdCost.h"
|
|
42
|
|
43 //! \ingroup TLibCommon
|
|
44 //! \{
|
|
45
|
|
46 TComRdCost::TComRdCost()
|
|
47 {
|
|
48 init();
|
|
49 }
|
|
50
|
|
51 TComRdCost::~TComRdCost()
|
|
52 {
|
|
53 }
|
|
54
|
|
55 // Calculate RD functions
|
|
56 Double TComRdCost::calcRdCost( UInt uiBits, Distortion uiDistortion, Bool bFlag, DFunc eDFunc )
|
|
57 {
|
|
58 Double dRdCost = 0.0;
|
|
59 Double dLambda = 0.0;
|
|
60
|
|
61 switch ( eDFunc )
|
|
62 {
|
|
63 case DF_SSE:
|
|
64 assert(0);
|
|
65 break;
|
|
66 case DF_SAD:
|
|
67 #if RExt__HIGH_BIT_DEPTH_SUPPORT
|
|
68 dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
|
|
69 #else
|
|
70 dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
|
|
71 #endif
|
|
72 break;
|
|
73 case DF_DEFAULT:
|
|
74 dLambda = m_dLambda;
|
|
75 break;
|
|
76 case DF_SSE_FRAME:
|
|
77 dLambda = m_dFrameLambda;
|
|
78 break;
|
|
79 default:
|
|
80 assert (0);
|
|
81 break;
|
|
82 }
|
|
83
|
|
84 if (bFlag) //NOTE: this "bFlag" is never true
|
|
85 {
|
|
86 // Intra8x8, Intra4x4 Block only...
|
|
87 if (m_costMode != COST_STANDARD_LOSSY)
|
|
88 {
|
|
89 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
|
|
90 }
|
|
91 else
|
|
92 {
|
|
93 dRdCost = (((Double)uiDistortion) + ((Double)uiBits * dLambda));
|
|
94 }
|
|
95 }
|
|
96 else
|
|
97 {
|
|
98 if (eDFunc == DF_SAD)
|
|
99 {
|
|
100 if (m_costMode != COST_STANDARD_LOSSY)
|
|
101 {
|
|
102 dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
|
|
103 }
|
|
104 else
|
|
105 {
|
|
106 dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0));
|
|
107 }
|
|
108 }
|
|
109 else
|
|
110 {
|
|
111 if (m_costMode != COST_STANDARD_LOSSY)
|
|
112 {
|
|
113 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
|
|
114 }
|
|
115 else
|
|
116 {
|
|
117 dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5);
|
|
118 }
|
|
119 }
|
|
120 }
|
|
121
|
|
122 return dRdCost;
|
|
123 }
|
|
124
|
|
125 Double TComRdCost::calcRdCost64( UInt64 uiBits, UInt64 uiDistortion, Bool bFlag, DFunc eDFunc )
|
|
126 {
|
|
127 Double dRdCost = 0.0;
|
|
128 Double dLambda = 0.0;
|
|
129
|
|
130 switch ( eDFunc )
|
|
131 {
|
|
132 case DF_SSE:
|
|
133 assert(0);
|
|
134 break;
|
|
135 case DF_SAD:
|
|
136 #if RExt__HIGH_BIT_DEPTH_SUPPORT
|
|
137 dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
|
|
138 #else
|
|
139 dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
|
|
140 #endif
|
|
141 break;
|
|
142 case DF_DEFAULT:
|
|
143 dLambda = m_dLambda;
|
|
144 break;
|
|
145 case DF_SSE_FRAME:
|
|
146 dLambda = m_dFrameLambda;
|
|
147 break;
|
|
148 default:
|
|
149 assert (0);
|
|
150 break;
|
|
151 }
|
|
152
|
|
153 if (bFlag) //NOTE: this "bFlag" is never true
|
|
154 {
|
|
155 // Intra8x8, Intra4x4 Block only...
|
|
156 if (m_costMode != COST_STANDARD_LOSSY)
|
|
157 {
|
|
158 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
|
|
159 }
|
|
160 else
|
|
161 {
|
|
162 dRdCost = (((Double)(Int64)uiDistortion) + ((Double)(Int64)uiBits * dLambda));
|
|
163 }
|
|
164 }
|
|
165 else
|
|
166 {
|
|
167 if (eDFunc == DF_SAD)
|
|
168 {
|
|
169 if (m_costMode != COST_STANDARD_LOSSY)
|
|
170 {
|
|
171 dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
|
|
172 }
|
|
173 else
|
|
174 {
|
|
175 dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0));
|
|
176 }
|
|
177 }
|
|
178 else
|
|
179 {
|
|
180 if (m_costMode != COST_STANDARD_LOSSY)
|
|
181 {
|
|
182 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
|
|
183 }
|
|
184 else
|
|
185 {
|
|
186 dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5);
|
|
187 }
|
|
188 }
|
|
189 }
|
|
190
|
|
191 return dRdCost;
|
|
192 }
|
|
193
|
|
194 Void TComRdCost::setLambda( Double dLambda )
|
|
195 {
|
|
196 m_dLambda = dLambda;
|
|
197 m_sqrtLambda = sqrt(m_dLambda);
|
|
198 #if RExt__HIGH_BIT_DEPTH_SUPPORT
|
|
199 m_dLambdaMotionSAD[0] = 65536.0 * m_sqrtLambda;
|
|
200 m_dLambdaMotionSSE[0] = 65536.0 * m_dLambda;
|
|
201 #if FULL_NBIT
|
|
202 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
|
|
203 #else
|
|
204 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
|
|
205 #endif
|
|
206 m_dLambdaMotionSAD[1] = 65536.0 * sqrt(dLambda);
|
|
207 m_dLambdaMotionSSE[1] = 65536.0 * dLambda;
|
|
208 #else
|
|
209 m_uiLambdaMotionSAD[0] = (UInt)floor(65536.0 * m_sqrtLambda);
|
|
210 m_uiLambdaMotionSSE[0] = (UInt)floor(65536.0 * m_dLambda );
|
|
211 #if FULL_NBIT
|
|
212 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
|
|
213 #else
|
|
214 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
|
|
215 #endif
|
|
216 m_uiLambdaMotionSAD[1] = (UInt)floor(65536.0 * sqrt(dLambda));
|
|
217 m_uiLambdaMotionSSE[1] = (UInt)floor(65536.0 * dLambda );
|
|
218 #endif
|
|
219 }
|
|
220
|
|
221
|
|
222 // Initalize Function Pointer by [eDFunc]
|
|
223 Void TComRdCost::init()
|
|
224 {
|
|
225 m_afpDistortFunc[DF_DEFAULT] = NULL; // for DF_DEFAULT
|
|
226
|
|
227 m_afpDistortFunc[DF_SSE ] = TComRdCost::xGetSSE;
|
|
228 m_afpDistortFunc[DF_SSE4 ] = TComRdCost::xGetSSE4;
|
|
229 m_afpDistortFunc[DF_SSE8 ] = TComRdCost::xGetSSE8;
|
|
230 m_afpDistortFunc[DF_SSE16 ] = TComRdCost::xGetSSE16;
|
|
231 m_afpDistortFunc[DF_SSE32 ] = TComRdCost::xGetSSE32;
|
|
232 m_afpDistortFunc[DF_SSE64 ] = TComRdCost::xGetSSE64;
|
|
233 m_afpDistortFunc[DF_SSE16N ] = TComRdCost::xGetSSE16N;
|
|
234
|
|
235 m_afpDistortFunc[DF_SAD ] = TComRdCost::xGetSAD;
|
|
236 m_afpDistortFunc[DF_SAD4 ] = TComRdCost::xGetSAD4;
|
|
237 m_afpDistortFunc[DF_SAD8 ] = TComRdCost::xGetSAD8;
|
|
238 m_afpDistortFunc[DF_SAD16 ] = TComRdCost::xGetSAD16;
|
|
239 m_afpDistortFunc[DF_SAD32 ] = TComRdCost::xGetSAD32;
|
|
240 m_afpDistortFunc[DF_SAD64 ] = TComRdCost::xGetSAD64;
|
|
241 m_afpDistortFunc[DF_SAD16N ] = TComRdCost::xGetSAD16N;
|
|
242
|
|
243 m_afpDistortFunc[DF_SADS ] = TComRdCost::xGetSAD;
|
|
244 m_afpDistortFunc[DF_SADS4 ] = TComRdCost::xGetSAD4;
|
|
245 m_afpDistortFunc[DF_SADS8 ] = TComRdCost::xGetSAD8;
|
|
246 m_afpDistortFunc[DF_SADS16 ] = TComRdCost::xGetSAD16;
|
|
247 m_afpDistortFunc[DF_SADS32 ] = TComRdCost::xGetSAD32;
|
|
248 m_afpDistortFunc[DF_SADS64 ] = TComRdCost::xGetSAD64;
|
|
249 m_afpDistortFunc[DF_SADS16N] = TComRdCost::xGetSAD16N;
|
|
250
|
|
251 #if AMP_SAD
|
|
252 m_afpDistortFunc[DF_SAD12 ] = TComRdCost::xGetSAD12;
|
|
253 m_afpDistortFunc[DF_SAD24 ] = TComRdCost::xGetSAD24;
|
|
254 m_afpDistortFunc[DF_SAD48 ] = TComRdCost::xGetSAD48;
|
|
255
|
|
256 m_afpDistortFunc[DF_SADS12 ] = TComRdCost::xGetSAD12;
|
|
257 m_afpDistortFunc[DF_SADS24 ] = TComRdCost::xGetSAD24;
|
|
258 m_afpDistortFunc[DF_SADS48 ] = TComRdCost::xGetSAD48;
|
|
259 #endif
|
|
260 m_afpDistortFunc[DF_HADS ] = TComRdCost::xGetHADs;
|
|
261 m_afpDistortFunc[DF_HADS4 ] = TComRdCost::xGetHADs;
|
|
262 m_afpDistortFunc[DF_HADS8 ] = TComRdCost::xGetHADs;
|
|
263 m_afpDistortFunc[DF_HADS16 ] = TComRdCost::xGetHADs;
|
|
264 m_afpDistortFunc[DF_HADS32 ] = TComRdCost::xGetHADs;
|
|
265 m_afpDistortFunc[DF_HADS64 ] = TComRdCost::xGetHADs;
|
|
266 m_afpDistortFunc[DF_HADS16N] = TComRdCost::xGetHADs;
|
|
267
|
|
268 m_costMode = COST_STANDARD_LOSSY;
|
|
269
|
|
270 #if RExt__HIGH_BIT_DEPTH_SUPPORT
|
|
271 m_dCost = 0;
|
|
272 #else
|
|
273 m_uiCost = 0;
|
|
274 #endif
|
|
275 m_iCostScale = 0;
|
|
276 }
|
|
277
|
|
278 UInt TComRdCost::xGetComponentBits( Int iVal )
|
|
279 {
|
|
280 UInt uiLength = 1;
|
|
281 UInt uiTemp = ( iVal <= 0) ? (-iVal<<1)+1: (iVal<<1);
|
|
282
|
|
283 assert ( uiTemp );
|
|
284
|
|
285 while ( 1 != uiTemp )
|
|
286 {
|
|
287 uiTemp >>= 1;
|
|
288 uiLength += 2;
|
|
289 }
|
|
290
|
|
291 return uiLength;
|
|
292 }
|
|
293
|
|
294 Void TComRdCost::setDistParam( UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam& rcDistParam )
|
|
295 {
|
|
296 // set Block Width / Height
|
|
297 rcDistParam.iCols = uiBlkWidth;
|
|
298 rcDistParam.iRows = uiBlkHeight;
|
|
299 rcDistParam.DistFunc = m_afpDistortFunc[eDFunc + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
|
|
300
|
|
301 // initialize
|
|
302 rcDistParam.iSubShift = 0;
|
|
303 }
|
|
304
|
|
305 // Setting the Distortion Parameter for Inter (ME)
|
|
306 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, DistParam& rcDistParam )
|
|
307 {
|
|
308 // set Original & Curr Pointer / Stride
|
|
309 rcDistParam.pOrg = pcPatternKey->getROIY();
|
|
310 rcDistParam.pCur = piRefY;
|
|
311
|
|
312 rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
|
|
313 rcDistParam.iStrideCur = iRefStride;
|
|
314
|
|
315 // set Block Width / Height
|
|
316 rcDistParam.iCols = pcPatternKey->getROIYWidth();
|
|
317 rcDistParam.iRows = pcPatternKey->getROIYHeight();
|
|
318 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
|
|
319
|
|
320 #if AMP_SAD
|
|
321 if (rcDistParam.iCols == 12)
|
|
322 {
|
|
323 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD12];
|
|
324 }
|
|
325 else if (rcDistParam.iCols == 24)
|
|
326 {
|
|
327 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD24];
|
|
328 }
|
|
329 else if (rcDistParam.iCols == 48)
|
|
330 {
|
|
331 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD48];
|
|
332 }
|
|
333 #endif
|
|
334
|
|
335 // initialize
|
|
336 rcDistParam.iSubShift = 0;
|
|
337 }
|
|
338
|
|
339 // Setting the Distortion Parameter for Inter (subpel ME with step)
|
|
340 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME )
|
|
341 {
|
|
342 // set Original & Curr Pointer / Stride
|
|
343 rcDistParam.pOrg = pcPatternKey->getROIY();
|
|
344 rcDistParam.pCur = piRefY;
|
|
345
|
|
346 rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
|
|
347 rcDistParam.iStrideCur = iRefStride * iStep;
|
|
348
|
|
349 // set Step for interpolated buffer
|
|
350 rcDistParam.iStep = iStep;
|
|
351
|
|
352 // set Block Width / Height
|
|
353 rcDistParam.iCols = pcPatternKey->getROIYWidth();
|
|
354 rcDistParam.iRows = pcPatternKey->getROIYHeight();
|
|
355
|
|
356 // set distortion function
|
|
357 if ( !bHADME )
|
|
358 {
|
|
359 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
|
|
360 #if AMP_SAD
|
|
361 if (rcDistParam.iCols == 12)
|
|
362 {
|
|
363 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS12];
|
|
364 }
|
|
365 else if (rcDistParam.iCols == 24)
|
|
366 {
|
|
367 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS24];
|
|
368 }
|
|
369 else if (rcDistParam.iCols == 48)
|
|
370 {
|
|
371 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS48];
|
|
372 }
|
|
373 #endif
|
|
374 }
|
|
375 else
|
|
376 {
|
|
377 rcDistParam.DistFunc = m_afpDistortFunc[DF_HADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
|
|
378 }
|
|
379
|
|
380 // initialize
|
|
381 rcDistParam.iSubShift = 0;
|
|
382 }
|
|
383
|
|
384 Void TComRdCost::setDistParam( DistParam& rcDP, Int bitDepth, Pel* p1, Int iStride1, Pel* p2, Int iStride2, Int iWidth, Int iHeight, Bool bHadamard )
|
|
385 {
|
|
386 rcDP.pOrg = p1;
|
|
387 rcDP.pCur = p2;
|
|
388 rcDP.iStrideOrg = iStride1;
|
|
389 rcDP.iStrideCur = iStride2;
|
|
390 rcDP.iCols = iWidth;
|
|
391 rcDP.iRows = iHeight;
|
|
392 rcDP.iStep = 1;
|
|
393 rcDP.iSubShift = 0;
|
|
394 rcDP.bitDepth = bitDepth;
|
|
395 rcDP.DistFunc = m_afpDistortFunc[ ( bHadamard ? DF_HADS : DF_SADS ) + g_aucConvertToBit[ iWidth ] + 1 ];
|
|
396 }
|
|
397
|
|
398 Distortion TComRdCost::calcHAD( Int bitDepth, Pel* pi0, Int iStride0, Pel* pi1, Int iStride1, Int iWidth, Int iHeight )
|
|
399 {
|
|
400 Distortion uiSum = 0;
|
|
401 Int x, y;
|
|
402
|
|
403 if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) )
|
|
404 {
|
|
405 for ( y=0; y<iHeight; y+= 8 )
|
|
406 {
|
|
407 for ( x=0; x<iWidth; x+= 8 )
|
|
408 {
|
|
409 uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
|
|
410 }
|
|
411 pi0 += iStride0*8;
|
|
412 pi1 += iStride1*8;
|
|
413 }
|
|
414 }
|
|
415 else
|
|
416 {
|
|
417 assert ( ( (iWidth % 4) == 0 ) && ( (iHeight % 4) == 0 ) );
|
|
418
|
|
419 for ( y=0; y<iHeight; y+= 4 )
|
|
420 {
|
|
421 for ( x=0; x<iWidth; x+= 4 )
|
|
422 {
|
|
423 uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
|
|
424 }
|
|
425 pi0 += iStride0*4;
|
|
426 pi1 += iStride1*4;
|
|
427 }
|
|
428 }
|
|
429
|
|
430 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8) );
|
|
431 }
|
|
432
|
|
433 Distortion TComRdCost::getDistPart( Int bitDepth, Pel* piCur, Int iCurStride, Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, const ComponentID compID, DFunc eDFunc )
|
|
434 {
|
|
435 DistParam cDtParam;
|
|
436 setDistParam( uiBlkWidth, uiBlkHeight, eDFunc, cDtParam );
|
|
437 cDtParam.pOrg = piOrg;
|
|
438 cDtParam.pCur = piCur;
|
|
439 cDtParam.iStrideOrg = iOrgStride;
|
|
440 cDtParam.iStrideCur = iCurStride;
|
|
441 cDtParam.iStep = 1;
|
|
442
|
|
443 cDtParam.bApplyWeight = false;
|
|
444 cDtParam.compIdx = MAX_NUM_COMPONENT; // just for assert: to be sure it was set before use
|
|
445 cDtParam.bitDepth = bitDepth;
|
|
446
|
|
447 if (isChroma(compID))
|
|
448 {
|
|
449 return ((Distortion) (m_distortionWeight[compID] * cDtParam.DistFunc( &cDtParam )));
|
|
450 }
|
|
451 else
|
|
452 {
|
|
453 return cDtParam.DistFunc( &cDtParam );
|
|
454 }
|
|
455 }
|
|
456
|
|
457 // ====================================================================================================================
|
|
458 // Distortion functions
|
|
459 // ====================================================================================================================
|
|
460
|
|
461 // --------------------------------------------------------------------------------------------------------------------
|
|
462 // SAD
|
|
463 // --------------------------------------------------------------------------------------------------------------------
|
|
464
|
|
465 Distortion TComRdCost::xGetSAD( DistParam* pcDtParam )
|
|
466 {
|
|
467 if ( pcDtParam->bApplyWeight )
|
|
468 {
|
|
469 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
470 }
|
|
471 const Pel* piOrg = pcDtParam->pOrg;
|
|
472 const Pel* piCur = pcDtParam->pCur;
|
|
473 Int iRows = pcDtParam->iRows;
|
|
474 Int iCols = pcDtParam->iCols;
|
|
475 Int iStrideCur = pcDtParam->iStrideCur;
|
|
476 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
477
|
|
478 Distortion uiSum = 0;
|
|
479
|
|
480 for( ; iRows != 0; iRows-- )
|
|
481 {
|
|
482 for (Int n = 0; n < iCols; n++ )
|
|
483 {
|
|
484 uiSum += abs( piOrg[n] - piCur[n] );
|
|
485 }
|
|
486 piOrg += iStrideOrg;
|
|
487 piCur += iStrideCur;
|
|
488 }
|
|
489
|
|
490 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
491 }
|
|
492
|
|
493 Distortion TComRdCost::xGetSAD4( DistParam* pcDtParam )
|
|
494 {
|
|
495 if ( pcDtParam->bApplyWeight )
|
|
496 {
|
|
497 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
498 }
|
|
499 const Pel* piOrg = pcDtParam->pOrg;
|
|
500 const Pel* piCur = pcDtParam->pCur;
|
|
501 Int iRows = pcDtParam->iRows;
|
|
502 Int iSubShift = pcDtParam->iSubShift;
|
|
503 Int iSubStep = ( 1 << iSubShift );
|
|
504 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
505 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
506
|
|
507 Distortion uiSum = 0;
|
|
508
|
|
509 for( ; iRows != 0; iRows-=iSubStep )
|
|
510 {
|
|
511 uiSum += abs( piOrg[0] - piCur[0] );
|
|
512 uiSum += abs( piOrg[1] - piCur[1] );
|
|
513 uiSum += abs( piOrg[2] - piCur[2] );
|
|
514 uiSum += abs( piOrg[3] - piCur[3] );
|
|
515
|
|
516 piOrg += iStrideOrg;
|
|
517 piCur += iStrideCur;
|
|
518 }
|
|
519
|
|
520 uiSum <<= iSubShift;
|
|
521 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
522 }
|
|
523
|
|
524 Distortion TComRdCost::xGetSAD8( DistParam* pcDtParam )
|
|
525 {
|
|
526 if ( pcDtParam->bApplyWeight )
|
|
527 {
|
|
528 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
529 }
|
|
530 const Pel* piOrg = pcDtParam->pOrg;
|
|
531 const Pel* piCur = pcDtParam->pCur;
|
|
532 Int iRows = pcDtParam->iRows;
|
|
533 Int iSubShift = pcDtParam->iSubShift;
|
|
534 Int iSubStep = ( 1 << iSubShift );
|
|
535 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
536 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
537
|
|
538 Distortion uiSum = 0;
|
|
539
|
|
540 for( ; iRows != 0; iRows-=iSubStep )
|
|
541 {
|
|
542 uiSum += abs( piOrg[0] - piCur[0] );
|
|
543 uiSum += abs( piOrg[1] - piCur[1] );
|
|
544 uiSum += abs( piOrg[2] - piCur[2] );
|
|
545 uiSum += abs( piOrg[3] - piCur[3] );
|
|
546 uiSum += abs( piOrg[4] - piCur[4] );
|
|
547 uiSum += abs( piOrg[5] - piCur[5] );
|
|
548 uiSum += abs( piOrg[6] - piCur[6] );
|
|
549 uiSum += abs( piOrg[7] - piCur[7] );
|
|
550
|
|
551 piOrg += iStrideOrg;
|
|
552 piCur += iStrideCur;
|
|
553 }
|
|
554
|
|
555 uiSum <<= iSubShift;
|
|
556 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
557 }
|
|
558
|
|
559 Distortion TComRdCost::xGetSAD16( DistParam* pcDtParam )
|
|
560 {
|
|
561 if ( pcDtParam->bApplyWeight )
|
|
562 {
|
|
563 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
564 }
|
|
565 const Pel* piOrg = pcDtParam->pOrg;
|
|
566 const Pel* piCur = pcDtParam->pCur;
|
|
567 Int iRows = pcDtParam->iRows;
|
|
568 Int iSubShift = pcDtParam->iSubShift;
|
|
569 Int iSubStep = ( 1 << iSubShift );
|
|
570 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
571 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
572
|
|
573 Distortion uiSum = 0;
|
|
574
|
|
575 for( ; iRows != 0; iRows-=iSubStep )
|
|
576 {
|
|
577 uiSum += abs( piOrg[0] - piCur[0] );
|
|
578 uiSum += abs( piOrg[1] - piCur[1] );
|
|
579 uiSum += abs( piOrg[2] - piCur[2] );
|
|
580 uiSum += abs( piOrg[3] - piCur[3] );
|
|
581 uiSum += abs( piOrg[4] - piCur[4] );
|
|
582 uiSum += abs( piOrg[5] - piCur[5] );
|
|
583 uiSum += abs( piOrg[6] - piCur[6] );
|
|
584 uiSum += abs( piOrg[7] - piCur[7] );
|
|
585 uiSum += abs( piOrg[8] - piCur[8] );
|
|
586 uiSum += abs( piOrg[9] - piCur[9] );
|
|
587 uiSum += abs( piOrg[10] - piCur[10] );
|
|
588 uiSum += abs( piOrg[11] - piCur[11] );
|
|
589 uiSum += abs( piOrg[12] - piCur[12] );
|
|
590 uiSum += abs( piOrg[13] - piCur[13] );
|
|
591 uiSum += abs( piOrg[14] - piCur[14] );
|
|
592 uiSum += abs( piOrg[15] - piCur[15] );
|
|
593
|
|
594 piOrg += iStrideOrg;
|
|
595 piCur += iStrideCur;
|
|
596 }
|
|
597
|
|
598 uiSum <<= iSubShift;
|
|
599 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
600 }
|
|
601
|
|
602 #if AMP_SAD
|
|
603 Distortion TComRdCost::xGetSAD12( DistParam* pcDtParam )
|
|
604 {
|
|
605 if ( pcDtParam->bApplyWeight )
|
|
606 {
|
|
607 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
608 }
|
|
609 const Pel* piOrg = pcDtParam->pOrg;
|
|
610 const Pel* piCur = pcDtParam->pCur;
|
|
611 Int iRows = pcDtParam->iRows;
|
|
612 Int iSubShift = pcDtParam->iSubShift;
|
|
613 Int iSubStep = ( 1 << iSubShift );
|
|
614 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
615 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
616
|
|
617 Distortion uiSum = 0;
|
|
618
|
|
619 for( ; iRows != 0; iRows-=iSubStep )
|
|
620 {
|
|
621 uiSum += abs( piOrg[0] - piCur[0] );
|
|
622 uiSum += abs( piOrg[1] - piCur[1] );
|
|
623 uiSum += abs( piOrg[2] - piCur[2] );
|
|
624 uiSum += abs( piOrg[3] - piCur[3] );
|
|
625 uiSum += abs( piOrg[4] - piCur[4] );
|
|
626 uiSum += abs( piOrg[5] - piCur[5] );
|
|
627 uiSum += abs( piOrg[6] - piCur[6] );
|
|
628 uiSum += abs( piOrg[7] - piCur[7] );
|
|
629 uiSum += abs( piOrg[8] - piCur[8] );
|
|
630 uiSum += abs( piOrg[9] - piCur[9] );
|
|
631 uiSum += abs( piOrg[10] - piCur[10] );
|
|
632 uiSum += abs( piOrg[11] - piCur[11] );
|
|
633
|
|
634 piOrg += iStrideOrg;
|
|
635 piCur += iStrideCur;
|
|
636 }
|
|
637
|
|
638 uiSum <<= iSubShift;
|
|
639 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
640 }
|
|
641 #endif
|
|
642
|
|
643 Distortion TComRdCost::xGetSAD16N( DistParam* pcDtParam )
|
|
644 {
|
|
645 const Pel* piOrg = pcDtParam->pOrg;
|
|
646 const Pel* piCur = pcDtParam->pCur;
|
|
647 Int iRows = pcDtParam->iRows;
|
|
648 Int iCols = pcDtParam->iCols;
|
|
649 Int iSubShift = pcDtParam->iSubShift;
|
|
650 Int iSubStep = ( 1 << iSubShift );
|
|
651 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
652 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
653
|
|
654 Distortion uiSum = 0;
|
|
655
|
|
656 for( ; iRows != 0; iRows-=iSubStep )
|
|
657 {
|
|
658 for (Int n = 0; n < iCols; n+=16 )
|
|
659 {
|
|
660 uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] );
|
|
661 uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] );
|
|
662 uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] );
|
|
663 uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] );
|
|
664 uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] );
|
|
665 uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] );
|
|
666 uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] );
|
|
667 uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] );
|
|
668 uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] );
|
|
669 uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] );
|
|
670 uiSum += abs( piOrg[n+10] - piCur[n+10] );
|
|
671 uiSum += abs( piOrg[n+11] - piCur[n+11] );
|
|
672 uiSum += abs( piOrg[n+12] - piCur[n+12] );
|
|
673 uiSum += abs( piOrg[n+13] - piCur[n+13] );
|
|
674 uiSum += abs( piOrg[n+14] - piCur[n+14] );
|
|
675 uiSum += abs( piOrg[n+15] - piCur[n+15] );
|
|
676 }
|
|
677 piOrg += iStrideOrg;
|
|
678 piCur += iStrideCur;
|
|
679 }
|
|
680
|
|
681 uiSum <<= iSubShift;
|
|
682 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
683 }
|
|
684
|
|
685 Distortion TComRdCost::xGetSAD32( DistParam* pcDtParam )
|
|
686 {
|
|
687 if ( pcDtParam->bApplyWeight )
|
|
688 {
|
|
689 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
690 }
|
|
691 const Pel* piOrg = pcDtParam->pOrg;
|
|
692 const Pel* piCur = pcDtParam->pCur;
|
|
693 Int iRows = pcDtParam->iRows;
|
|
694 Int iSubShift = pcDtParam->iSubShift;
|
|
695 Int iSubStep = ( 1 << iSubShift );
|
|
696 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
697 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
698
|
|
699 Distortion uiSum = 0;
|
|
700
|
|
701 for( ; iRows != 0; iRows-=iSubStep )
|
|
702 {
|
|
703 uiSum += abs( piOrg[0] - piCur[0] );
|
|
704 uiSum += abs( piOrg[1] - piCur[1] );
|
|
705 uiSum += abs( piOrg[2] - piCur[2] );
|
|
706 uiSum += abs( piOrg[3] - piCur[3] );
|
|
707 uiSum += abs( piOrg[4] - piCur[4] );
|
|
708 uiSum += abs( piOrg[5] - piCur[5] );
|
|
709 uiSum += abs( piOrg[6] - piCur[6] );
|
|
710 uiSum += abs( piOrg[7] - piCur[7] );
|
|
711 uiSum += abs( piOrg[8] - piCur[8] );
|
|
712 uiSum += abs( piOrg[9] - piCur[9] );
|
|
713 uiSum += abs( piOrg[10] - piCur[10] );
|
|
714 uiSum += abs( piOrg[11] - piCur[11] );
|
|
715 uiSum += abs( piOrg[12] - piCur[12] );
|
|
716 uiSum += abs( piOrg[13] - piCur[13] );
|
|
717 uiSum += abs( piOrg[14] - piCur[14] );
|
|
718 uiSum += abs( piOrg[15] - piCur[15] );
|
|
719 uiSum += abs( piOrg[16] - piCur[16] );
|
|
720 uiSum += abs( piOrg[17] - piCur[17] );
|
|
721 uiSum += abs( piOrg[18] - piCur[18] );
|
|
722 uiSum += abs( piOrg[19] - piCur[19] );
|
|
723 uiSum += abs( piOrg[20] - piCur[20] );
|
|
724 uiSum += abs( piOrg[21] - piCur[21] );
|
|
725 uiSum += abs( piOrg[22] - piCur[22] );
|
|
726 uiSum += abs( piOrg[23] - piCur[23] );
|
|
727 uiSum += abs( piOrg[24] - piCur[24] );
|
|
728 uiSum += abs( piOrg[25] - piCur[25] );
|
|
729 uiSum += abs( piOrg[26] - piCur[26] );
|
|
730 uiSum += abs( piOrg[27] - piCur[27] );
|
|
731 uiSum += abs( piOrg[28] - piCur[28] );
|
|
732 uiSum += abs( piOrg[29] - piCur[29] );
|
|
733 uiSum += abs( piOrg[30] - piCur[30] );
|
|
734 uiSum += abs( piOrg[31] - piCur[31] );
|
|
735
|
|
736 piOrg += iStrideOrg;
|
|
737 piCur += iStrideCur;
|
|
738 }
|
|
739
|
|
740 uiSum <<= iSubShift;
|
|
741 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
742 }
|
|
743
|
|
744 #if AMP_SAD
|
|
745 Distortion TComRdCost::xGetSAD24( DistParam* pcDtParam )
|
|
746 {
|
|
747 if ( pcDtParam->bApplyWeight )
|
|
748 {
|
|
749 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
750 }
|
|
751 const Pel* piOrg = pcDtParam->pOrg;
|
|
752 const Pel* piCur = pcDtParam->pCur;
|
|
753 Int iRows = pcDtParam->iRows;
|
|
754 Int iSubShift = pcDtParam->iSubShift;
|
|
755 Int iSubStep = ( 1 << iSubShift );
|
|
756 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
757 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
758
|
|
759 Distortion uiSum = 0;
|
|
760
|
|
761 for( ; iRows != 0; iRows-=iSubStep )
|
|
762 {
|
|
763 uiSum += abs( piOrg[0] - piCur[0] );
|
|
764 uiSum += abs( piOrg[1] - piCur[1] );
|
|
765 uiSum += abs( piOrg[2] - piCur[2] );
|
|
766 uiSum += abs( piOrg[3] - piCur[3] );
|
|
767 uiSum += abs( piOrg[4] - piCur[4] );
|
|
768 uiSum += abs( piOrg[5] - piCur[5] );
|
|
769 uiSum += abs( piOrg[6] - piCur[6] );
|
|
770 uiSum += abs( piOrg[7] - piCur[7] );
|
|
771 uiSum += abs( piOrg[8] - piCur[8] );
|
|
772 uiSum += abs( piOrg[9] - piCur[9] );
|
|
773 uiSum += abs( piOrg[10] - piCur[10] );
|
|
774 uiSum += abs( piOrg[11] - piCur[11] );
|
|
775 uiSum += abs( piOrg[12] - piCur[12] );
|
|
776 uiSum += abs( piOrg[13] - piCur[13] );
|
|
777 uiSum += abs( piOrg[14] - piCur[14] );
|
|
778 uiSum += abs( piOrg[15] - piCur[15] );
|
|
779 uiSum += abs( piOrg[16] - piCur[16] );
|
|
780 uiSum += abs( piOrg[17] - piCur[17] );
|
|
781 uiSum += abs( piOrg[18] - piCur[18] );
|
|
782 uiSum += abs( piOrg[19] - piCur[19] );
|
|
783 uiSum += abs( piOrg[20] - piCur[20] );
|
|
784 uiSum += abs( piOrg[21] - piCur[21] );
|
|
785 uiSum += abs( piOrg[22] - piCur[22] );
|
|
786 uiSum += abs( piOrg[23] - piCur[23] );
|
|
787
|
|
788 piOrg += iStrideOrg;
|
|
789 piCur += iStrideCur;
|
|
790 }
|
|
791
|
|
792 uiSum <<= iSubShift;
|
|
793 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
794 }
|
|
795
|
|
796 #endif
|
|
797
|
|
798 Distortion TComRdCost::xGetSAD64( DistParam* pcDtParam )
|
|
799 {
|
|
800 if ( pcDtParam->bApplyWeight )
|
|
801 {
|
|
802 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
803 }
|
|
804 const Pel* piOrg = pcDtParam->pOrg;
|
|
805 const Pel* piCur = pcDtParam->pCur;
|
|
806 Int iRows = pcDtParam->iRows;
|
|
807 Int iSubShift = pcDtParam->iSubShift;
|
|
808 Int iSubStep = ( 1 << iSubShift );
|
|
809 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
810 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
811
|
|
812 Distortion uiSum = 0;
|
|
813
|
|
814 for( ; iRows != 0; iRows-=iSubStep )
|
|
815 {
|
|
816 uiSum += abs( piOrg[0] - piCur[0] );
|
|
817 uiSum += abs( piOrg[1] - piCur[1] );
|
|
818 uiSum += abs( piOrg[2] - piCur[2] );
|
|
819 uiSum += abs( piOrg[3] - piCur[3] );
|
|
820 uiSum += abs( piOrg[4] - piCur[4] );
|
|
821 uiSum += abs( piOrg[5] - piCur[5] );
|
|
822 uiSum += abs( piOrg[6] - piCur[6] );
|
|
823 uiSum += abs( piOrg[7] - piCur[7] );
|
|
824 uiSum += abs( piOrg[8] - piCur[8] );
|
|
825 uiSum += abs( piOrg[9] - piCur[9] );
|
|
826 uiSum += abs( piOrg[10] - piCur[10] );
|
|
827 uiSum += abs( piOrg[11] - piCur[11] );
|
|
828 uiSum += abs( piOrg[12] - piCur[12] );
|
|
829 uiSum += abs( piOrg[13] - piCur[13] );
|
|
830 uiSum += abs( piOrg[14] - piCur[14] );
|
|
831 uiSum += abs( piOrg[15] - piCur[15] );
|
|
832 uiSum += abs( piOrg[16] - piCur[16] );
|
|
833 uiSum += abs( piOrg[17] - piCur[17] );
|
|
834 uiSum += abs( piOrg[18] - piCur[18] );
|
|
835 uiSum += abs( piOrg[19] - piCur[19] );
|
|
836 uiSum += abs( piOrg[20] - piCur[20] );
|
|
837 uiSum += abs( piOrg[21] - piCur[21] );
|
|
838 uiSum += abs( piOrg[22] - piCur[22] );
|
|
839 uiSum += abs( piOrg[23] - piCur[23] );
|
|
840 uiSum += abs( piOrg[24] - piCur[24] );
|
|
841 uiSum += abs( piOrg[25] - piCur[25] );
|
|
842 uiSum += abs( piOrg[26] - piCur[26] );
|
|
843 uiSum += abs( piOrg[27] - piCur[27] );
|
|
844 uiSum += abs( piOrg[28] - piCur[28] );
|
|
845 uiSum += abs( piOrg[29] - piCur[29] );
|
|
846 uiSum += abs( piOrg[30] - piCur[30] );
|
|
847 uiSum += abs( piOrg[31] - piCur[31] );
|
|
848 uiSum += abs( piOrg[32] - piCur[32] );
|
|
849 uiSum += abs( piOrg[33] - piCur[33] );
|
|
850 uiSum += abs( piOrg[34] - piCur[34] );
|
|
851 uiSum += abs( piOrg[35] - piCur[35] );
|
|
852 uiSum += abs( piOrg[36] - piCur[36] );
|
|
853 uiSum += abs( piOrg[37] - piCur[37] );
|
|
854 uiSum += abs( piOrg[38] - piCur[38] );
|
|
855 uiSum += abs( piOrg[39] - piCur[39] );
|
|
856 uiSum += abs( piOrg[40] - piCur[40] );
|
|
857 uiSum += abs( piOrg[41] - piCur[41] );
|
|
858 uiSum += abs( piOrg[42] - piCur[42] );
|
|
859 uiSum += abs( piOrg[43] - piCur[43] );
|
|
860 uiSum += abs( piOrg[44] - piCur[44] );
|
|
861 uiSum += abs( piOrg[45] - piCur[45] );
|
|
862 uiSum += abs( piOrg[46] - piCur[46] );
|
|
863 uiSum += abs( piOrg[47] - piCur[47] );
|
|
864 uiSum += abs( piOrg[48] - piCur[48] );
|
|
865 uiSum += abs( piOrg[49] - piCur[49] );
|
|
866 uiSum += abs( piOrg[50] - piCur[50] );
|
|
867 uiSum += abs( piOrg[51] - piCur[51] );
|
|
868 uiSum += abs( piOrg[52] - piCur[52] );
|
|
869 uiSum += abs( piOrg[53] - piCur[53] );
|
|
870 uiSum += abs( piOrg[54] - piCur[54] );
|
|
871 uiSum += abs( piOrg[55] - piCur[55] );
|
|
872 uiSum += abs( piOrg[56] - piCur[56] );
|
|
873 uiSum += abs( piOrg[57] - piCur[57] );
|
|
874 uiSum += abs( piOrg[58] - piCur[58] );
|
|
875 uiSum += abs( piOrg[59] - piCur[59] );
|
|
876 uiSum += abs( piOrg[60] - piCur[60] );
|
|
877 uiSum += abs( piOrg[61] - piCur[61] );
|
|
878 uiSum += abs( piOrg[62] - piCur[62] );
|
|
879 uiSum += abs( piOrg[63] - piCur[63] );
|
|
880
|
|
881 piOrg += iStrideOrg;
|
|
882 piCur += iStrideCur;
|
|
883 }
|
|
884
|
|
885 uiSum <<= iSubShift;
|
|
886 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
887 }
|
|
888
|
|
889 #if AMP_SAD
|
|
890 Distortion TComRdCost::xGetSAD48( DistParam* pcDtParam )
|
|
891 {
|
|
892 if ( pcDtParam->bApplyWeight )
|
|
893 {
|
|
894 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
|
|
895 }
|
|
896 const Pel* piOrg = pcDtParam->pOrg;
|
|
897 const Pel* piCur = pcDtParam->pCur;
|
|
898 Int iRows = pcDtParam->iRows;
|
|
899 Int iSubShift = pcDtParam->iSubShift;
|
|
900 Int iSubStep = ( 1 << iSubShift );
|
|
901 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
|
|
902 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
|
|
903
|
|
904 Distortion uiSum = 0;
|
|
905
|
|
906 for( ; iRows != 0; iRows-=iSubStep )
|
|
907 {
|
|
908 uiSum += abs( piOrg[0] - piCur[0] );
|
|
909 uiSum += abs( piOrg[1] - piCur[1] );
|
|
910 uiSum += abs( piOrg[2] - piCur[2] );
|
|
911 uiSum += abs( piOrg[3] - piCur[3] );
|
|
912 uiSum += abs( piOrg[4] - piCur[4] );
|
|
913 uiSum += abs( piOrg[5] - piCur[5] );
|
|
914 uiSum += abs( piOrg[6] - piCur[6] );
|
|
915 uiSum += abs( piOrg[7] - piCur[7] );
|
|
916 uiSum += abs( piOrg[8] - piCur[8] );
|
|
917 uiSum += abs( piOrg[9] - piCur[9] );
|
|
918 uiSum += abs( piOrg[10] - piCur[10] );
|
|
919 uiSum += abs( piOrg[11] - piCur[11] );
|
|
920 uiSum += abs( piOrg[12] - piCur[12] );
|
|
921 uiSum += abs( piOrg[13] - piCur[13] );
|
|
922 uiSum += abs( piOrg[14] - piCur[14] );
|
|
923 uiSum += abs( piOrg[15] - piCur[15] );
|
|
924 uiSum += abs( piOrg[16] - piCur[16] );
|
|
925 uiSum += abs( piOrg[17] - piCur[17] );
|
|
926 uiSum += abs( piOrg[18] - piCur[18] );
|
|
927 uiSum += abs( piOrg[19] - piCur[19] );
|
|
928 uiSum += abs( piOrg[20] - piCur[20] );
|
|
929 uiSum += abs( piOrg[21] - piCur[21] );
|
|
930 uiSum += abs( piOrg[22] - piCur[22] );
|
|
931 uiSum += abs( piOrg[23] - piCur[23] );
|
|
932 uiSum += abs( piOrg[24] - piCur[24] );
|
|
933 uiSum += abs( piOrg[25] - piCur[25] );
|
|
934 uiSum += abs( piOrg[26] - piCur[26] );
|
|
935 uiSum += abs( piOrg[27] - piCur[27] );
|
|
936 uiSum += abs( piOrg[28] - piCur[28] );
|
|
937 uiSum += abs( piOrg[29] - piCur[29] );
|
|
938 uiSum += abs( piOrg[30] - piCur[30] );
|
|
939 uiSum += abs( piOrg[31] - piCur[31] );
|
|
940 uiSum += abs( piOrg[32] - piCur[32] );
|
|
941 uiSum += abs( piOrg[33] - piCur[33] );
|
|
942 uiSum += abs( piOrg[34] - piCur[34] );
|
|
943 uiSum += abs( piOrg[35] - piCur[35] );
|
|
944 uiSum += abs( piOrg[36] - piCur[36] );
|
|
945 uiSum += abs( piOrg[37] - piCur[37] );
|
|
946 uiSum += abs( piOrg[38] - piCur[38] );
|
|
947 uiSum += abs( piOrg[39] - piCur[39] );
|
|
948 uiSum += abs( piOrg[40] - piCur[40] );
|
|
949 uiSum += abs( piOrg[41] - piCur[41] );
|
|
950 uiSum += abs( piOrg[42] - piCur[42] );
|
|
951 uiSum += abs( piOrg[43] - piCur[43] );
|
|
952 uiSum += abs( piOrg[44] - piCur[44] );
|
|
953 uiSum += abs( piOrg[45] - piCur[45] );
|
|
954 uiSum += abs( piOrg[46] - piCur[46] );
|
|
955 uiSum += abs( piOrg[47] - piCur[47] );
|
|
956
|
|
957 piOrg += iStrideOrg;
|
|
958 piCur += iStrideCur;
|
|
959 }
|
|
960
|
|
961 uiSum <<= iSubShift;
|
|
962 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
963 }
|
|
964 #endif
|
|
965
|
|
966 // --------------------------------------------------------------------------------------------------------------------
|
|
967 // SSE
|
|
968 // --------------------------------------------------------------------------------------------------------------------
|
|
969
|
|
970 Distortion TComRdCost::xGetSSE( DistParam* pcDtParam )
|
|
971 {
|
|
972 if ( pcDtParam->bApplyWeight )
|
|
973 {
|
|
974 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
|
|
975 }
|
|
976 const Pel* piOrg = pcDtParam->pOrg;
|
|
977 const Pel* piCur = pcDtParam->pCur;
|
|
978 Int iRows = pcDtParam->iRows;
|
|
979 Int iCols = pcDtParam->iCols;
|
|
980 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
981 Int iStrideCur = pcDtParam->iStrideCur;
|
|
982
|
|
983 Distortion uiSum = 0;
|
|
984 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
|
|
985
|
|
986 Intermediate_Int iTemp;
|
|
987
|
|
988 for( ; iRows != 0; iRows-- )
|
|
989 {
|
|
990 for (Int n = 0; n < iCols; n++ )
|
|
991 {
|
|
992 iTemp = piOrg[n ] - piCur[n ];
|
|
993 uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
994 }
|
|
995 piOrg += iStrideOrg;
|
|
996 piCur += iStrideCur;
|
|
997 }
|
|
998
|
|
999 return ( uiSum );
|
|
1000 }
|
|
1001
|
|
1002 Distortion TComRdCost::xGetSSE4( DistParam* pcDtParam )
|
|
1003 {
|
|
1004 if ( pcDtParam->bApplyWeight )
|
|
1005 {
|
|
1006 assert( pcDtParam->iCols == 4 );
|
|
1007 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
|
|
1008 }
|
|
1009 const Pel* piOrg = pcDtParam->pOrg;
|
|
1010 const Pel* piCur = pcDtParam->pCur;
|
|
1011 Int iRows = pcDtParam->iRows;
|
|
1012 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
1013 Int iStrideCur = pcDtParam->iStrideCur;
|
|
1014
|
|
1015 Distortion uiSum = 0;
|
|
1016 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
|
|
1017
|
|
1018 Intermediate_Int iTemp;
|
|
1019
|
|
1020 for( ; iRows != 0; iRows-- )
|
|
1021 {
|
|
1022
|
|
1023 iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1024 iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1025 iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1026 iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1027
|
|
1028 piOrg += iStrideOrg;
|
|
1029 piCur += iStrideCur;
|
|
1030 }
|
|
1031
|
|
1032 return ( uiSum );
|
|
1033 }
|
|
1034
|
|
1035 Distortion TComRdCost::xGetSSE8( DistParam* pcDtParam )
|
|
1036 {
|
|
1037 if ( pcDtParam->bApplyWeight )
|
|
1038 {
|
|
1039 assert( pcDtParam->iCols == 8 );
|
|
1040 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
|
|
1041 }
|
|
1042 const Pel* piOrg = pcDtParam->pOrg;
|
|
1043 const Pel* piCur = pcDtParam->pCur;
|
|
1044 Int iRows = pcDtParam->iRows;
|
|
1045 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
1046 Int iStrideCur = pcDtParam->iStrideCur;
|
|
1047
|
|
1048 Distortion uiSum = 0;
|
|
1049 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
|
|
1050
|
|
1051 Intermediate_Int iTemp;
|
|
1052
|
|
1053 for( ; iRows != 0; iRows-- )
|
|
1054 {
|
|
1055 iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1056 iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1057 iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1058 iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1059 iTemp = piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1060 iTemp = piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1061 iTemp = piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1062 iTemp = piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1063
|
|
1064 piOrg += iStrideOrg;
|
|
1065 piCur += iStrideCur;
|
|
1066 }
|
|
1067
|
|
1068 return ( uiSum );
|
|
1069 }
|
|
1070
|
|
1071 Distortion TComRdCost::xGetSSE16( DistParam* pcDtParam )
|
|
1072 {
|
|
1073 if ( pcDtParam->bApplyWeight )
|
|
1074 {
|
|
1075 assert( pcDtParam->iCols == 16 );
|
|
1076 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
|
|
1077 }
|
|
1078 const Pel* piOrg = pcDtParam->pOrg;
|
|
1079 const Pel* piCur = pcDtParam->pCur;
|
|
1080 Int iRows = pcDtParam->iRows;
|
|
1081 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
1082 Int iStrideCur = pcDtParam->iStrideCur;
|
|
1083
|
|
1084 Distortion uiSum = 0;
|
|
1085 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
|
|
1086
|
|
1087 Intermediate_Int iTemp;
|
|
1088
|
|
1089 for( ; iRows != 0; iRows-- )
|
|
1090 {
|
|
1091
|
|
1092 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1093 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1094 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1095 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1096 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1097 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1098 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1099 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1100 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1101 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1102 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1103 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1104 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1105 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1106 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1107 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1108
|
|
1109 piOrg += iStrideOrg;
|
|
1110 piCur += iStrideCur;
|
|
1111 }
|
|
1112
|
|
1113 return ( uiSum );
|
|
1114 }
|
|
1115
|
|
1116 Distortion TComRdCost::xGetSSE16N( DistParam* pcDtParam )
|
|
1117 {
|
|
1118 if ( pcDtParam->bApplyWeight )
|
|
1119 {
|
|
1120 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
|
|
1121 }
|
|
1122 const Pel* piOrg = pcDtParam->pOrg;
|
|
1123 const Pel* piCur = pcDtParam->pCur;
|
|
1124 Int iRows = pcDtParam->iRows;
|
|
1125 Int iCols = pcDtParam->iCols;
|
|
1126 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
1127 Int iStrideCur = pcDtParam->iStrideCur;
|
|
1128
|
|
1129 Distortion uiSum = 0;
|
|
1130 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
|
|
1131
|
|
1132 Intermediate_Int iTemp;
|
|
1133
|
|
1134 for( ; iRows != 0; iRows-- )
|
|
1135 {
|
|
1136 for (Int n = 0; n < iCols; n+=16 )
|
|
1137 {
|
|
1138
|
|
1139 iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1140 iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1141 iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1142 iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1143 iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1144 iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1145 iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1146 iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1147 iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1148 iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1149 iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1150 iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1151 iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1152 iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1153 iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1154 iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1155
|
|
1156 }
|
|
1157 piOrg += iStrideOrg;
|
|
1158 piCur += iStrideCur;
|
|
1159 }
|
|
1160
|
|
1161 return ( uiSum );
|
|
1162 }
|
|
1163
|
|
1164 Distortion TComRdCost::xGetSSE32( DistParam* pcDtParam )
|
|
1165 {
|
|
1166 if ( pcDtParam->bApplyWeight )
|
|
1167 {
|
|
1168 assert( pcDtParam->iCols == 32 );
|
|
1169 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
|
|
1170 }
|
|
1171 const Pel* piOrg = pcDtParam->pOrg;
|
|
1172 const Pel* piCur = pcDtParam->pCur;
|
|
1173 Int iRows = pcDtParam->iRows;
|
|
1174 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
1175 Int iStrideCur = pcDtParam->iStrideCur;
|
|
1176
|
|
1177 Distortion uiSum = 0;
|
|
1178 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
|
|
1179
|
|
1180 Intermediate_Int iTemp;
|
|
1181
|
|
1182 for( ; iRows != 0; iRows-- )
|
|
1183 {
|
|
1184
|
|
1185 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1186 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1187 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1188 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1189 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1190 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1191 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1192 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1193 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1194 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1195 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1196 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1197 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1198 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1199 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1200 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1201 iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1202 iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1203 iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1204 iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1205 iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1206 iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1207 iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1208 iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1209 iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1210 iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1211 iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1212 iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1213 iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1214 iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1215 iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1216 iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1217
|
|
1218 piOrg += iStrideOrg;
|
|
1219 piCur += iStrideCur;
|
|
1220 }
|
|
1221
|
|
1222 return ( uiSum );
|
|
1223 }
|
|
1224
|
|
1225 Distortion TComRdCost::xGetSSE64( DistParam* pcDtParam )
|
|
1226 {
|
|
1227 if ( pcDtParam->bApplyWeight )
|
|
1228 {
|
|
1229 assert( pcDtParam->iCols == 64 );
|
|
1230 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
|
|
1231 }
|
|
1232 const Pel* piOrg = pcDtParam->pOrg;
|
|
1233 const Pel* piCur = pcDtParam->pCur;
|
|
1234 Int iRows = pcDtParam->iRows;
|
|
1235 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
1236 Int iStrideCur = pcDtParam->iStrideCur;
|
|
1237
|
|
1238 Distortion uiSum = 0;
|
|
1239 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
|
|
1240
|
|
1241 Intermediate_Int iTemp;
|
|
1242
|
|
1243 for( ; iRows != 0; iRows-- )
|
|
1244 {
|
|
1245 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1246 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1247 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1248 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1249 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1250 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1251 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1252 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1253 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1254 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1255 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1256 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1257 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1258 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1259 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1260 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1261 iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1262 iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1263 iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1264 iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1265 iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1266 iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1267 iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1268 iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1269 iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1270 iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1271 iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1272 iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1273 iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1274 iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1275 iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1276 iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1277 iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1278 iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1279 iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1280 iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1281 iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1282 iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1283 iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1284 iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1285 iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1286 iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1287 iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1288 iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1289 iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1290 iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1291 iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1292 iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1293 iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1294 iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1295 iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1296 iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1297 iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1298 iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1299 iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1300 iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1301 iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1302 iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1303 iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1304 iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1305 iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1306 iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1307 iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1308 iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
|
|
1309
|
|
1310 piOrg += iStrideOrg;
|
|
1311 piCur += iStrideCur;
|
|
1312 }
|
|
1313
|
|
1314 return ( uiSum );
|
|
1315 }
|
|
1316
|
|
1317 // --------------------------------------------------------------------------------------------------------------------
|
|
1318 // HADAMARD with step (used in fractional search)
|
|
1319 // --------------------------------------------------------------------------------------------------------------------
|
|
1320
|
|
1321 Distortion TComRdCost::xCalcHADs2x2( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
|
|
1322 {
|
|
1323 Distortion satd = 0;
|
|
1324 TCoeff diff[4], m[4];
|
|
1325 assert( iStep == 1 );
|
|
1326 diff[0] = piOrg[0 ] - piCur[0];
|
|
1327 diff[1] = piOrg[1 ] - piCur[1];
|
|
1328 diff[2] = piOrg[iStrideOrg ] - piCur[0 + iStrideCur];
|
|
1329 diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur];
|
|
1330 m[0] = diff[0] + diff[2];
|
|
1331 m[1] = diff[1] + diff[3];
|
|
1332 m[2] = diff[0] - diff[2];
|
|
1333 m[3] = diff[1] - diff[3];
|
|
1334
|
|
1335 satd += abs(m[0] + m[1]);
|
|
1336 satd += abs(m[0] - m[1]);
|
|
1337 satd += abs(m[2] + m[3]);
|
|
1338 satd += abs(m[2] - m[3]);
|
|
1339
|
|
1340 return satd;
|
|
1341 }
|
|
1342
|
|
1343 Distortion TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
|
|
1344 {
|
|
1345 Int k;
|
|
1346 Distortion satd = 0;
|
|
1347 TCoeff diff[16], m[16], d[16];
|
|
1348
|
|
1349 assert( iStep == 1 );
|
|
1350 for( k = 0; k < 16; k+=4 )
|
|
1351 {
|
|
1352 diff[k+0] = piOrg[0] - piCur[0];
|
|
1353 diff[k+1] = piOrg[1] - piCur[1];
|
|
1354 diff[k+2] = piOrg[2] - piCur[2];
|
|
1355 diff[k+3] = piOrg[3] - piCur[3];
|
|
1356
|
|
1357 piCur += iStrideCur;
|
|
1358 piOrg += iStrideOrg;
|
|
1359 }
|
|
1360
|
|
1361 /*===== hadamard transform =====*/
|
|
1362 m[ 0] = diff[ 0] + diff[12];
|
|
1363 m[ 1] = diff[ 1] + diff[13];
|
|
1364 m[ 2] = diff[ 2] + diff[14];
|
|
1365 m[ 3] = diff[ 3] + diff[15];
|
|
1366 m[ 4] = diff[ 4] + diff[ 8];
|
|
1367 m[ 5] = diff[ 5] + diff[ 9];
|
|
1368 m[ 6] = diff[ 6] + diff[10];
|
|
1369 m[ 7] = diff[ 7] + diff[11];
|
|
1370 m[ 8] = diff[ 4] - diff[ 8];
|
|
1371 m[ 9] = diff[ 5] - diff[ 9];
|
|
1372 m[10] = diff[ 6] - diff[10];
|
|
1373 m[11] = diff[ 7] - diff[11];
|
|
1374 m[12] = diff[ 0] - diff[12];
|
|
1375 m[13] = diff[ 1] - diff[13];
|
|
1376 m[14] = diff[ 2] - diff[14];
|
|
1377 m[15] = diff[ 3] - diff[15];
|
|
1378
|
|
1379 d[ 0] = m[ 0] + m[ 4];
|
|
1380 d[ 1] = m[ 1] + m[ 5];
|
|
1381 d[ 2] = m[ 2] + m[ 6];
|
|
1382 d[ 3] = m[ 3] + m[ 7];
|
|
1383 d[ 4] = m[ 8] + m[12];
|
|
1384 d[ 5] = m[ 9] + m[13];
|
|
1385 d[ 6] = m[10] + m[14];
|
|
1386 d[ 7] = m[11] + m[15];
|
|
1387 d[ 8] = m[ 0] - m[ 4];
|
|
1388 d[ 9] = m[ 1] - m[ 5];
|
|
1389 d[10] = m[ 2] - m[ 6];
|
|
1390 d[11] = m[ 3] - m[ 7];
|
|
1391 d[12] = m[12] - m[ 8];
|
|
1392 d[13] = m[13] - m[ 9];
|
|
1393 d[14] = m[14] - m[10];
|
|
1394 d[15] = m[15] - m[11];
|
|
1395
|
|
1396 m[ 0] = d[ 0] + d[ 3];
|
|
1397 m[ 1] = d[ 1] + d[ 2];
|
|
1398 m[ 2] = d[ 1] - d[ 2];
|
|
1399 m[ 3] = d[ 0] - d[ 3];
|
|
1400 m[ 4] = d[ 4] + d[ 7];
|
|
1401 m[ 5] = d[ 5] + d[ 6];
|
|
1402 m[ 6] = d[ 5] - d[ 6];
|
|
1403 m[ 7] = d[ 4] - d[ 7];
|
|
1404 m[ 8] = d[ 8] + d[11];
|
|
1405 m[ 9] = d[ 9] + d[10];
|
|
1406 m[10] = d[ 9] - d[10];
|
|
1407 m[11] = d[ 8] - d[11];
|
|
1408 m[12] = d[12] + d[15];
|
|
1409 m[13] = d[13] + d[14];
|
|
1410 m[14] = d[13] - d[14];
|
|
1411 m[15] = d[12] - d[15];
|
|
1412
|
|
1413 d[ 0] = m[ 0] + m[ 1];
|
|
1414 d[ 1] = m[ 0] - m[ 1];
|
|
1415 d[ 2] = m[ 2] + m[ 3];
|
|
1416 d[ 3] = m[ 3] - m[ 2];
|
|
1417 d[ 4] = m[ 4] + m[ 5];
|
|
1418 d[ 5] = m[ 4] - m[ 5];
|
|
1419 d[ 6] = m[ 6] + m[ 7];
|
|
1420 d[ 7] = m[ 7] - m[ 6];
|
|
1421 d[ 8] = m[ 8] + m[ 9];
|
|
1422 d[ 9] = m[ 8] - m[ 9];
|
|
1423 d[10] = m[10] + m[11];
|
|
1424 d[11] = m[11] - m[10];
|
|
1425 d[12] = m[12] + m[13];
|
|
1426 d[13] = m[12] - m[13];
|
|
1427 d[14] = m[14] + m[15];
|
|
1428 d[15] = m[15] - m[14];
|
|
1429
|
|
1430 for (k=0; k<16; ++k)
|
|
1431 {
|
|
1432 satd += abs(d[k]);
|
|
1433 }
|
|
1434 satd = ((satd+1)>>1);
|
|
1435
|
|
1436 return satd;
|
|
1437 }
|
|
1438
|
|
1439 Distortion TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
|
|
1440 {
|
|
1441 Int k, i, j, jj;
|
|
1442 Distortion sad = 0;
|
|
1443 TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8];
|
|
1444 assert( iStep == 1 );
|
|
1445 for( k = 0; k < 64; k += 8 )
|
|
1446 {
|
|
1447 diff[k+0] = piOrg[0] - piCur[0];
|
|
1448 diff[k+1] = piOrg[1] - piCur[1];
|
|
1449 diff[k+2] = piOrg[2] - piCur[2];
|
|
1450 diff[k+3] = piOrg[3] - piCur[3];
|
|
1451 diff[k+4] = piOrg[4] - piCur[4];
|
|
1452 diff[k+5] = piOrg[5] - piCur[5];
|
|
1453 diff[k+6] = piOrg[6] - piCur[6];
|
|
1454 diff[k+7] = piOrg[7] - piCur[7];
|
|
1455
|
|
1456 piCur += iStrideCur;
|
|
1457 piOrg += iStrideOrg;
|
|
1458 }
|
|
1459
|
|
1460 //horizontal
|
|
1461 for (j=0; j < 8; j++)
|
|
1462 {
|
|
1463 jj = j << 3;
|
|
1464 m2[j][0] = diff[jj ] + diff[jj+4];
|
|
1465 m2[j][1] = diff[jj+1] + diff[jj+5];
|
|
1466 m2[j][2] = diff[jj+2] + diff[jj+6];
|
|
1467 m2[j][3] = diff[jj+3] + diff[jj+7];
|
|
1468 m2[j][4] = diff[jj ] - diff[jj+4];
|
|
1469 m2[j][5] = diff[jj+1] - diff[jj+5];
|
|
1470 m2[j][6] = diff[jj+2] - diff[jj+6];
|
|
1471 m2[j][7] = diff[jj+3] - diff[jj+7];
|
|
1472
|
|
1473 m1[j][0] = m2[j][0] + m2[j][2];
|
|
1474 m1[j][1] = m2[j][1] + m2[j][3];
|
|
1475 m1[j][2] = m2[j][0] - m2[j][2];
|
|
1476 m1[j][3] = m2[j][1] - m2[j][3];
|
|
1477 m1[j][4] = m2[j][4] + m2[j][6];
|
|
1478 m1[j][5] = m2[j][5] + m2[j][7];
|
|
1479 m1[j][6] = m2[j][4] - m2[j][6];
|
|
1480 m1[j][7] = m2[j][5] - m2[j][7];
|
|
1481
|
|
1482 m2[j][0] = m1[j][0] + m1[j][1];
|
|
1483 m2[j][1] = m1[j][0] - m1[j][1];
|
|
1484 m2[j][2] = m1[j][2] + m1[j][3];
|
|
1485 m2[j][3] = m1[j][2] - m1[j][3];
|
|
1486 m2[j][4] = m1[j][4] + m1[j][5];
|
|
1487 m2[j][5] = m1[j][4] - m1[j][5];
|
|
1488 m2[j][6] = m1[j][6] + m1[j][7];
|
|
1489 m2[j][7] = m1[j][6] - m1[j][7];
|
|
1490 }
|
|
1491
|
|
1492 //vertical
|
|
1493 for (i=0; i < 8; i++)
|
|
1494 {
|
|
1495 m3[0][i] = m2[0][i] + m2[4][i];
|
|
1496 m3[1][i] = m2[1][i] + m2[5][i];
|
|
1497 m3[2][i] = m2[2][i] + m2[6][i];
|
|
1498 m3[3][i] = m2[3][i] + m2[7][i];
|
|
1499 m3[4][i] = m2[0][i] - m2[4][i];
|
|
1500 m3[5][i] = m2[1][i] - m2[5][i];
|
|
1501 m3[6][i] = m2[2][i] - m2[6][i];
|
|
1502 m3[7][i] = m2[3][i] - m2[7][i];
|
|
1503
|
|
1504 m1[0][i] = m3[0][i] + m3[2][i];
|
|
1505 m1[1][i] = m3[1][i] + m3[3][i];
|
|
1506 m1[2][i] = m3[0][i] - m3[2][i];
|
|
1507 m1[3][i] = m3[1][i] - m3[3][i];
|
|
1508 m1[4][i] = m3[4][i] + m3[6][i];
|
|
1509 m1[5][i] = m3[5][i] + m3[7][i];
|
|
1510 m1[6][i] = m3[4][i] - m3[6][i];
|
|
1511 m1[7][i] = m3[5][i] - m3[7][i];
|
|
1512
|
|
1513 m2[0][i] = m1[0][i] + m1[1][i];
|
|
1514 m2[1][i] = m1[0][i] - m1[1][i];
|
|
1515 m2[2][i] = m1[2][i] + m1[3][i];
|
|
1516 m2[3][i] = m1[2][i] - m1[3][i];
|
|
1517 m2[4][i] = m1[4][i] + m1[5][i];
|
|
1518 m2[5][i] = m1[4][i] - m1[5][i];
|
|
1519 m2[6][i] = m1[6][i] + m1[7][i];
|
|
1520 m2[7][i] = m1[6][i] - m1[7][i];
|
|
1521 }
|
|
1522
|
|
1523 for (i = 0; i < 8; i++)
|
|
1524 {
|
|
1525 for (j = 0; j < 8; j++)
|
|
1526 {
|
|
1527 sad += abs(m2[i][j]);
|
|
1528 }
|
|
1529 }
|
|
1530
|
|
1531 sad=((sad+2)>>2);
|
|
1532
|
|
1533 return sad;
|
|
1534 }
|
|
1535
|
|
1536
|
|
1537 Distortion TComRdCost::xGetHADs( DistParam* pcDtParam )
|
|
1538 {
|
|
1539 if ( pcDtParam->bApplyWeight )
|
|
1540 {
|
|
1541 return TComRdCostWeightPrediction::xGetHADsw( pcDtParam );
|
|
1542 }
|
|
1543 Pel* piOrg = pcDtParam->pOrg;
|
|
1544 Pel* piCur = pcDtParam->pCur;
|
|
1545 Int iRows = pcDtParam->iRows;
|
|
1546 Int iCols = pcDtParam->iCols;
|
|
1547 Int iStrideCur = pcDtParam->iStrideCur;
|
|
1548 Int iStrideOrg = pcDtParam->iStrideOrg;
|
|
1549 Int iStep = pcDtParam->iStep;
|
|
1550
|
|
1551 Int x, y;
|
|
1552
|
|
1553 Distortion uiSum = 0;
|
|
1554
|
|
1555 if( ( iRows % 8 == 0) && (iCols % 8 == 0) )
|
|
1556 {
|
|
1557 Int iOffsetOrg = iStrideOrg<<3;
|
|
1558 Int iOffsetCur = iStrideCur<<3;
|
|
1559 for ( y=0; y<iRows; y+= 8 )
|
|
1560 {
|
|
1561 for ( x=0; x<iCols; x+= 8 )
|
|
1562 {
|
|
1563 uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
|
|
1564 }
|
|
1565 piOrg += iOffsetOrg;
|
|
1566 piCur += iOffsetCur;
|
|
1567 }
|
|
1568 }
|
|
1569 else if( ( iRows % 4 == 0) && (iCols % 4 == 0) )
|
|
1570 {
|
|
1571 Int iOffsetOrg = iStrideOrg<<2;
|
|
1572 Int iOffsetCur = iStrideCur<<2;
|
|
1573
|
|
1574 for ( y=0; y<iRows; y+= 4 )
|
|
1575 {
|
|
1576 for ( x=0; x<iCols; x+= 4 )
|
|
1577 {
|
|
1578 uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
|
|
1579 }
|
|
1580 piOrg += iOffsetOrg;
|
|
1581 piCur += iOffsetCur;
|
|
1582 }
|
|
1583 }
|
|
1584 else if( ( iRows % 2 == 0) && (iCols % 2 == 0) )
|
|
1585 {
|
|
1586 Int iOffsetOrg = iStrideOrg<<1;
|
|
1587 Int iOffsetCur = iStrideCur<<1;
|
|
1588 for ( y=0; y<iRows; y+=2 )
|
|
1589 {
|
|
1590 for ( x=0; x<iCols; x+=2 )
|
|
1591 {
|
|
1592 uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
|
|
1593 }
|
|
1594 piOrg += iOffsetOrg;
|
|
1595 piCur += iOffsetCur;
|
|
1596 }
|
|
1597 }
|
|
1598 else
|
|
1599 {
|
|
1600 assert(false);
|
|
1601 }
|
|
1602
|
|
1603 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
|
|
1604 }
|
|
1605
|
|
1606 //! \}
|