comparison jctvc/TLibCommon/TComRdCost.cpp @ 0:772086c29cc7

Initial import.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 16 Nov 2016 11:16:33 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:772086c29cc7
1 /* The copyright in this software is being made available under the BSD
2 * License, included below. This software may be subject to other third party
3 * and contributor rights, including patent rights, and no such rights are
4 * granted under this license.
5 *
6 * Copyright (c) 2010-2014, ITU/ISO/IEC
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
11 *
12 * * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18 * be used to endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31 * THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /** \file TComRdCost.cpp
35 \brief RD cost computation class
36 */
37
38 #include <math.h>
39 #include <assert.h>
40 #include "TComRom.h"
41 #include "TComRdCost.h"
42
43 //! \ingroup TLibCommon
44 //! \{
45
46 TComRdCost::TComRdCost()
47 {
48 init();
49 }
50
51 TComRdCost::~TComRdCost()
52 {
53 }
54
55 // Calculate RD functions
56 Double TComRdCost::calcRdCost( UInt uiBits, Distortion uiDistortion, Bool bFlag, DFunc eDFunc )
57 {
58 Double dRdCost = 0.0;
59 Double dLambda = 0.0;
60
61 switch ( eDFunc )
62 {
63 case DF_SSE:
64 assert(0);
65 break;
66 case DF_SAD:
67 #if RExt__HIGH_BIT_DEPTH_SUPPORT
68 dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
69 #else
70 dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
71 #endif
72 break;
73 case DF_DEFAULT:
74 dLambda = m_dLambda;
75 break;
76 case DF_SSE_FRAME:
77 dLambda = m_dFrameLambda;
78 break;
79 default:
80 assert (0);
81 break;
82 }
83
84 if (bFlag) //NOTE: this "bFlag" is never true
85 {
86 // Intra8x8, Intra4x4 Block only...
87 if (m_costMode != COST_STANDARD_LOSSY)
88 {
89 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
90 }
91 else
92 {
93 dRdCost = (((Double)uiDistortion) + ((Double)uiBits * dLambda));
94 }
95 }
96 else
97 {
98 if (eDFunc == DF_SAD)
99 {
100 if (m_costMode != COST_STANDARD_LOSSY)
101 {
102 dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
103 }
104 else
105 {
106 dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0));
107 }
108 }
109 else
110 {
111 if (m_costMode != COST_STANDARD_LOSSY)
112 {
113 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
114 }
115 else
116 {
117 dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5);
118 }
119 }
120 }
121
122 return dRdCost;
123 }
124
125 Double TComRdCost::calcRdCost64( UInt64 uiBits, UInt64 uiDistortion, Bool bFlag, DFunc eDFunc )
126 {
127 Double dRdCost = 0.0;
128 Double dLambda = 0.0;
129
130 switch ( eDFunc )
131 {
132 case DF_SSE:
133 assert(0);
134 break;
135 case DF_SAD:
136 #if RExt__HIGH_BIT_DEPTH_SUPPORT
137 dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
138 #else
139 dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
140 #endif
141 break;
142 case DF_DEFAULT:
143 dLambda = m_dLambda;
144 break;
145 case DF_SSE_FRAME:
146 dLambda = m_dFrameLambda;
147 break;
148 default:
149 assert (0);
150 break;
151 }
152
153 if (bFlag) //NOTE: this "bFlag" is never true
154 {
155 // Intra8x8, Intra4x4 Block only...
156 if (m_costMode != COST_STANDARD_LOSSY)
157 {
158 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
159 }
160 else
161 {
162 dRdCost = (((Double)(Int64)uiDistortion) + ((Double)(Int64)uiBits * dLambda));
163 }
164 }
165 else
166 {
167 if (eDFunc == DF_SAD)
168 {
169 if (m_costMode != COST_STANDARD_LOSSY)
170 {
171 dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
172 }
173 else
174 {
175 dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0));
176 }
177 }
178 else
179 {
180 if (m_costMode != COST_STANDARD_LOSSY)
181 {
182 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
183 }
184 else
185 {
186 dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5);
187 }
188 }
189 }
190
191 return dRdCost;
192 }
193
194 Void TComRdCost::setLambda( Double dLambda )
195 {
196 m_dLambda = dLambda;
197 m_sqrtLambda = sqrt(m_dLambda);
198 #if RExt__HIGH_BIT_DEPTH_SUPPORT
199 m_dLambdaMotionSAD[0] = 65536.0 * m_sqrtLambda;
200 m_dLambdaMotionSSE[0] = 65536.0 * m_dLambda;
201 #if FULL_NBIT
202 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
203 #else
204 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
205 #endif
206 m_dLambdaMotionSAD[1] = 65536.0 * sqrt(dLambda);
207 m_dLambdaMotionSSE[1] = 65536.0 * dLambda;
208 #else
209 m_uiLambdaMotionSAD[0] = (UInt)floor(65536.0 * m_sqrtLambda);
210 m_uiLambdaMotionSSE[0] = (UInt)floor(65536.0 * m_dLambda );
211 #if FULL_NBIT
212 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
213 #else
214 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
215 #endif
216 m_uiLambdaMotionSAD[1] = (UInt)floor(65536.0 * sqrt(dLambda));
217 m_uiLambdaMotionSSE[1] = (UInt)floor(65536.0 * dLambda );
218 #endif
219 }
220
221
222 // Initalize Function Pointer by [eDFunc]
223 Void TComRdCost::init()
224 {
225 m_afpDistortFunc[DF_DEFAULT] = NULL; // for DF_DEFAULT
226
227 m_afpDistortFunc[DF_SSE ] = TComRdCost::xGetSSE;
228 m_afpDistortFunc[DF_SSE4 ] = TComRdCost::xGetSSE4;
229 m_afpDistortFunc[DF_SSE8 ] = TComRdCost::xGetSSE8;
230 m_afpDistortFunc[DF_SSE16 ] = TComRdCost::xGetSSE16;
231 m_afpDistortFunc[DF_SSE32 ] = TComRdCost::xGetSSE32;
232 m_afpDistortFunc[DF_SSE64 ] = TComRdCost::xGetSSE64;
233 m_afpDistortFunc[DF_SSE16N ] = TComRdCost::xGetSSE16N;
234
235 m_afpDistortFunc[DF_SAD ] = TComRdCost::xGetSAD;
236 m_afpDistortFunc[DF_SAD4 ] = TComRdCost::xGetSAD4;
237 m_afpDistortFunc[DF_SAD8 ] = TComRdCost::xGetSAD8;
238 m_afpDistortFunc[DF_SAD16 ] = TComRdCost::xGetSAD16;
239 m_afpDistortFunc[DF_SAD32 ] = TComRdCost::xGetSAD32;
240 m_afpDistortFunc[DF_SAD64 ] = TComRdCost::xGetSAD64;
241 m_afpDistortFunc[DF_SAD16N ] = TComRdCost::xGetSAD16N;
242
243 m_afpDistortFunc[DF_SADS ] = TComRdCost::xGetSAD;
244 m_afpDistortFunc[DF_SADS4 ] = TComRdCost::xGetSAD4;
245 m_afpDistortFunc[DF_SADS8 ] = TComRdCost::xGetSAD8;
246 m_afpDistortFunc[DF_SADS16 ] = TComRdCost::xGetSAD16;
247 m_afpDistortFunc[DF_SADS32 ] = TComRdCost::xGetSAD32;
248 m_afpDistortFunc[DF_SADS64 ] = TComRdCost::xGetSAD64;
249 m_afpDistortFunc[DF_SADS16N] = TComRdCost::xGetSAD16N;
250
251 #if AMP_SAD
252 m_afpDistortFunc[DF_SAD12 ] = TComRdCost::xGetSAD12;
253 m_afpDistortFunc[DF_SAD24 ] = TComRdCost::xGetSAD24;
254 m_afpDistortFunc[DF_SAD48 ] = TComRdCost::xGetSAD48;
255
256 m_afpDistortFunc[DF_SADS12 ] = TComRdCost::xGetSAD12;
257 m_afpDistortFunc[DF_SADS24 ] = TComRdCost::xGetSAD24;
258 m_afpDistortFunc[DF_SADS48 ] = TComRdCost::xGetSAD48;
259 #endif
260 m_afpDistortFunc[DF_HADS ] = TComRdCost::xGetHADs;
261 m_afpDistortFunc[DF_HADS4 ] = TComRdCost::xGetHADs;
262 m_afpDistortFunc[DF_HADS8 ] = TComRdCost::xGetHADs;
263 m_afpDistortFunc[DF_HADS16 ] = TComRdCost::xGetHADs;
264 m_afpDistortFunc[DF_HADS32 ] = TComRdCost::xGetHADs;
265 m_afpDistortFunc[DF_HADS64 ] = TComRdCost::xGetHADs;
266 m_afpDistortFunc[DF_HADS16N] = TComRdCost::xGetHADs;
267
268 m_costMode = COST_STANDARD_LOSSY;
269
270 #if RExt__HIGH_BIT_DEPTH_SUPPORT
271 m_dCost = 0;
272 #else
273 m_uiCost = 0;
274 #endif
275 m_iCostScale = 0;
276 }
277
278 UInt TComRdCost::xGetComponentBits( Int iVal )
279 {
280 UInt uiLength = 1;
281 UInt uiTemp = ( iVal <= 0) ? (-iVal<<1)+1: (iVal<<1);
282
283 assert ( uiTemp );
284
285 while ( 1 != uiTemp )
286 {
287 uiTemp >>= 1;
288 uiLength += 2;
289 }
290
291 return uiLength;
292 }
293
294 Void TComRdCost::setDistParam( UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam& rcDistParam )
295 {
296 // set Block Width / Height
297 rcDistParam.iCols = uiBlkWidth;
298 rcDistParam.iRows = uiBlkHeight;
299 rcDistParam.DistFunc = m_afpDistortFunc[eDFunc + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
300
301 // initialize
302 rcDistParam.iSubShift = 0;
303 }
304
305 // Setting the Distortion Parameter for Inter (ME)
306 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, DistParam& rcDistParam )
307 {
308 // set Original & Curr Pointer / Stride
309 rcDistParam.pOrg = pcPatternKey->getROIY();
310 rcDistParam.pCur = piRefY;
311
312 rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
313 rcDistParam.iStrideCur = iRefStride;
314
315 // set Block Width / Height
316 rcDistParam.iCols = pcPatternKey->getROIYWidth();
317 rcDistParam.iRows = pcPatternKey->getROIYHeight();
318 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
319
320 #if AMP_SAD
321 if (rcDistParam.iCols == 12)
322 {
323 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD12];
324 }
325 else if (rcDistParam.iCols == 24)
326 {
327 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD24];
328 }
329 else if (rcDistParam.iCols == 48)
330 {
331 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD48];
332 }
333 #endif
334
335 // initialize
336 rcDistParam.iSubShift = 0;
337 }
338
339 // Setting the Distortion Parameter for Inter (subpel ME with step)
340 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME )
341 {
342 // set Original & Curr Pointer / Stride
343 rcDistParam.pOrg = pcPatternKey->getROIY();
344 rcDistParam.pCur = piRefY;
345
346 rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
347 rcDistParam.iStrideCur = iRefStride * iStep;
348
349 // set Step for interpolated buffer
350 rcDistParam.iStep = iStep;
351
352 // set Block Width / Height
353 rcDistParam.iCols = pcPatternKey->getROIYWidth();
354 rcDistParam.iRows = pcPatternKey->getROIYHeight();
355
356 // set distortion function
357 if ( !bHADME )
358 {
359 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
360 #if AMP_SAD
361 if (rcDistParam.iCols == 12)
362 {
363 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS12];
364 }
365 else if (rcDistParam.iCols == 24)
366 {
367 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS24];
368 }
369 else if (rcDistParam.iCols == 48)
370 {
371 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS48];
372 }
373 #endif
374 }
375 else
376 {
377 rcDistParam.DistFunc = m_afpDistortFunc[DF_HADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
378 }
379
380 // initialize
381 rcDistParam.iSubShift = 0;
382 }
383
384 Void TComRdCost::setDistParam( DistParam& rcDP, Int bitDepth, Pel* p1, Int iStride1, Pel* p2, Int iStride2, Int iWidth, Int iHeight, Bool bHadamard )
385 {
386 rcDP.pOrg = p1;
387 rcDP.pCur = p2;
388 rcDP.iStrideOrg = iStride1;
389 rcDP.iStrideCur = iStride2;
390 rcDP.iCols = iWidth;
391 rcDP.iRows = iHeight;
392 rcDP.iStep = 1;
393 rcDP.iSubShift = 0;
394 rcDP.bitDepth = bitDepth;
395 rcDP.DistFunc = m_afpDistortFunc[ ( bHadamard ? DF_HADS : DF_SADS ) + g_aucConvertToBit[ iWidth ] + 1 ];
396 }
397
398 Distortion TComRdCost::calcHAD( Int bitDepth, Pel* pi0, Int iStride0, Pel* pi1, Int iStride1, Int iWidth, Int iHeight )
399 {
400 Distortion uiSum = 0;
401 Int x, y;
402
403 if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) )
404 {
405 for ( y=0; y<iHeight; y+= 8 )
406 {
407 for ( x=0; x<iWidth; x+= 8 )
408 {
409 uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
410 }
411 pi0 += iStride0*8;
412 pi1 += iStride1*8;
413 }
414 }
415 else
416 {
417 assert ( ( (iWidth % 4) == 0 ) && ( (iHeight % 4) == 0 ) );
418
419 for ( y=0; y<iHeight; y+= 4 )
420 {
421 for ( x=0; x<iWidth; x+= 4 )
422 {
423 uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
424 }
425 pi0 += iStride0*4;
426 pi1 += iStride1*4;
427 }
428 }
429
430 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8) );
431 }
432
433 Distortion TComRdCost::getDistPart( Int bitDepth, Pel* piCur, Int iCurStride, Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, const ComponentID compID, DFunc eDFunc )
434 {
435 DistParam cDtParam;
436 setDistParam( uiBlkWidth, uiBlkHeight, eDFunc, cDtParam );
437 cDtParam.pOrg = piOrg;
438 cDtParam.pCur = piCur;
439 cDtParam.iStrideOrg = iOrgStride;
440 cDtParam.iStrideCur = iCurStride;
441 cDtParam.iStep = 1;
442
443 cDtParam.bApplyWeight = false;
444 cDtParam.compIdx = MAX_NUM_COMPONENT; // just for assert: to be sure it was set before use
445 cDtParam.bitDepth = bitDepth;
446
447 if (isChroma(compID))
448 {
449 return ((Distortion) (m_distortionWeight[compID] * cDtParam.DistFunc( &cDtParam )));
450 }
451 else
452 {
453 return cDtParam.DistFunc( &cDtParam );
454 }
455 }
456
457 // ====================================================================================================================
458 // Distortion functions
459 // ====================================================================================================================
460
461 // --------------------------------------------------------------------------------------------------------------------
462 // SAD
463 // --------------------------------------------------------------------------------------------------------------------
464
465 Distortion TComRdCost::xGetSAD( DistParam* pcDtParam )
466 {
467 if ( pcDtParam->bApplyWeight )
468 {
469 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
470 }
471 const Pel* piOrg = pcDtParam->pOrg;
472 const Pel* piCur = pcDtParam->pCur;
473 Int iRows = pcDtParam->iRows;
474 Int iCols = pcDtParam->iCols;
475 Int iStrideCur = pcDtParam->iStrideCur;
476 Int iStrideOrg = pcDtParam->iStrideOrg;
477
478 Distortion uiSum = 0;
479
480 for( ; iRows != 0; iRows-- )
481 {
482 for (Int n = 0; n < iCols; n++ )
483 {
484 uiSum += abs( piOrg[n] - piCur[n] );
485 }
486 piOrg += iStrideOrg;
487 piCur += iStrideCur;
488 }
489
490 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
491 }
492
493 Distortion TComRdCost::xGetSAD4( DistParam* pcDtParam )
494 {
495 if ( pcDtParam->bApplyWeight )
496 {
497 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
498 }
499 const Pel* piOrg = pcDtParam->pOrg;
500 const Pel* piCur = pcDtParam->pCur;
501 Int iRows = pcDtParam->iRows;
502 Int iSubShift = pcDtParam->iSubShift;
503 Int iSubStep = ( 1 << iSubShift );
504 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
505 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
506
507 Distortion uiSum = 0;
508
509 for( ; iRows != 0; iRows-=iSubStep )
510 {
511 uiSum += abs( piOrg[0] - piCur[0] );
512 uiSum += abs( piOrg[1] - piCur[1] );
513 uiSum += abs( piOrg[2] - piCur[2] );
514 uiSum += abs( piOrg[3] - piCur[3] );
515
516 piOrg += iStrideOrg;
517 piCur += iStrideCur;
518 }
519
520 uiSum <<= iSubShift;
521 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
522 }
523
524 Distortion TComRdCost::xGetSAD8( DistParam* pcDtParam )
525 {
526 if ( pcDtParam->bApplyWeight )
527 {
528 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
529 }
530 const Pel* piOrg = pcDtParam->pOrg;
531 const Pel* piCur = pcDtParam->pCur;
532 Int iRows = pcDtParam->iRows;
533 Int iSubShift = pcDtParam->iSubShift;
534 Int iSubStep = ( 1 << iSubShift );
535 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
536 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
537
538 Distortion uiSum = 0;
539
540 for( ; iRows != 0; iRows-=iSubStep )
541 {
542 uiSum += abs( piOrg[0] - piCur[0] );
543 uiSum += abs( piOrg[1] - piCur[1] );
544 uiSum += abs( piOrg[2] - piCur[2] );
545 uiSum += abs( piOrg[3] - piCur[3] );
546 uiSum += abs( piOrg[4] - piCur[4] );
547 uiSum += abs( piOrg[5] - piCur[5] );
548 uiSum += abs( piOrg[6] - piCur[6] );
549 uiSum += abs( piOrg[7] - piCur[7] );
550
551 piOrg += iStrideOrg;
552 piCur += iStrideCur;
553 }
554
555 uiSum <<= iSubShift;
556 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
557 }
558
559 Distortion TComRdCost::xGetSAD16( DistParam* pcDtParam )
560 {
561 if ( pcDtParam->bApplyWeight )
562 {
563 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
564 }
565 const Pel* piOrg = pcDtParam->pOrg;
566 const Pel* piCur = pcDtParam->pCur;
567 Int iRows = pcDtParam->iRows;
568 Int iSubShift = pcDtParam->iSubShift;
569 Int iSubStep = ( 1 << iSubShift );
570 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
571 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
572
573 Distortion uiSum = 0;
574
575 for( ; iRows != 0; iRows-=iSubStep )
576 {
577 uiSum += abs( piOrg[0] - piCur[0] );
578 uiSum += abs( piOrg[1] - piCur[1] );
579 uiSum += abs( piOrg[2] - piCur[2] );
580 uiSum += abs( piOrg[3] - piCur[3] );
581 uiSum += abs( piOrg[4] - piCur[4] );
582 uiSum += abs( piOrg[5] - piCur[5] );
583 uiSum += abs( piOrg[6] - piCur[6] );
584 uiSum += abs( piOrg[7] - piCur[7] );
585 uiSum += abs( piOrg[8] - piCur[8] );
586 uiSum += abs( piOrg[9] - piCur[9] );
587 uiSum += abs( piOrg[10] - piCur[10] );
588 uiSum += abs( piOrg[11] - piCur[11] );
589 uiSum += abs( piOrg[12] - piCur[12] );
590 uiSum += abs( piOrg[13] - piCur[13] );
591 uiSum += abs( piOrg[14] - piCur[14] );
592 uiSum += abs( piOrg[15] - piCur[15] );
593
594 piOrg += iStrideOrg;
595 piCur += iStrideCur;
596 }
597
598 uiSum <<= iSubShift;
599 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
600 }
601
602 #if AMP_SAD
603 Distortion TComRdCost::xGetSAD12( DistParam* pcDtParam )
604 {
605 if ( pcDtParam->bApplyWeight )
606 {
607 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
608 }
609 const Pel* piOrg = pcDtParam->pOrg;
610 const Pel* piCur = pcDtParam->pCur;
611 Int iRows = pcDtParam->iRows;
612 Int iSubShift = pcDtParam->iSubShift;
613 Int iSubStep = ( 1 << iSubShift );
614 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
615 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
616
617 Distortion uiSum = 0;
618
619 for( ; iRows != 0; iRows-=iSubStep )
620 {
621 uiSum += abs( piOrg[0] - piCur[0] );
622 uiSum += abs( piOrg[1] - piCur[1] );
623 uiSum += abs( piOrg[2] - piCur[2] );
624 uiSum += abs( piOrg[3] - piCur[3] );
625 uiSum += abs( piOrg[4] - piCur[4] );
626 uiSum += abs( piOrg[5] - piCur[5] );
627 uiSum += abs( piOrg[6] - piCur[6] );
628 uiSum += abs( piOrg[7] - piCur[7] );
629 uiSum += abs( piOrg[8] - piCur[8] );
630 uiSum += abs( piOrg[9] - piCur[9] );
631 uiSum += abs( piOrg[10] - piCur[10] );
632 uiSum += abs( piOrg[11] - piCur[11] );
633
634 piOrg += iStrideOrg;
635 piCur += iStrideCur;
636 }
637
638 uiSum <<= iSubShift;
639 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
640 }
641 #endif
642
643 Distortion TComRdCost::xGetSAD16N( DistParam* pcDtParam )
644 {
645 const Pel* piOrg = pcDtParam->pOrg;
646 const Pel* piCur = pcDtParam->pCur;
647 Int iRows = pcDtParam->iRows;
648 Int iCols = pcDtParam->iCols;
649 Int iSubShift = pcDtParam->iSubShift;
650 Int iSubStep = ( 1 << iSubShift );
651 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
652 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
653
654 Distortion uiSum = 0;
655
656 for( ; iRows != 0; iRows-=iSubStep )
657 {
658 for (Int n = 0; n < iCols; n+=16 )
659 {
660 uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] );
661 uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] );
662 uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] );
663 uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] );
664 uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] );
665 uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] );
666 uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] );
667 uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] );
668 uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] );
669 uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] );
670 uiSum += abs( piOrg[n+10] - piCur[n+10] );
671 uiSum += abs( piOrg[n+11] - piCur[n+11] );
672 uiSum += abs( piOrg[n+12] - piCur[n+12] );
673 uiSum += abs( piOrg[n+13] - piCur[n+13] );
674 uiSum += abs( piOrg[n+14] - piCur[n+14] );
675 uiSum += abs( piOrg[n+15] - piCur[n+15] );
676 }
677 piOrg += iStrideOrg;
678 piCur += iStrideCur;
679 }
680
681 uiSum <<= iSubShift;
682 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
683 }
684
685 Distortion TComRdCost::xGetSAD32( DistParam* pcDtParam )
686 {
687 if ( pcDtParam->bApplyWeight )
688 {
689 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
690 }
691 const Pel* piOrg = pcDtParam->pOrg;
692 const Pel* piCur = pcDtParam->pCur;
693 Int iRows = pcDtParam->iRows;
694 Int iSubShift = pcDtParam->iSubShift;
695 Int iSubStep = ( 1 << iSubShift );
696 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
697 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
698
699 Distortion uiSum = 0;
700
701 for( ; iRows != 0; iRows-=iSubStep )
702 {
703 uiSum += abs( piOrg[0] - piCur[0] );
704 uiSum += abs( piOrg[1] - piCur[1] );
705 uiSum += abs( piOrg[2] - piCur[2] );
706 uiSum += abs( piOrg[3] - piCur[3] );
707 uiSum += abs( piOrg[4] - piCur[4] );
708 uiSum += abs( piOrg[5] - piCur[5] );
709 uiSum += abs( piOrg[6] - piCur[6] );
710 uiSum += abs( piOrg[7] - piCur[7] );
711 uiSum += abs( piOrg[8] - piCur[8] );
712 uiSum += abs( piOrg[9] - piCur[9] );
713 uiSum += abs( piOrg[10] - piCur[10] );
714 uiSum += abs( piOrg[11] - piCur[11] );
715 uiSum += abs( piOrg[12] - piCur[12] );
716 uiSum += abs( piOrg[13] - piCur[13] );
717 uiSum += abs( piOrg[14] - piCur[14] );
718 uiSum += abs( piOrg[15] - piCur[15] );
719 uiSum += abs( piOrg[16] - piCur[16] );
720 uiSum += abs( piOrg[17] - piCur[17] );
721 uiSum += abs( piOrg[18] - piCur[18] );
722 uiSum += abs( piOrg[19] - piCur[19] );
723 uiSum += abs( piOrg[20] - piCur[20] );
724 uiSum += abs( piOrg[21] - piCur[21] );
725 uiSum += abs( piOrg[22] - piCur[22] );
726 uiSum += abs( piOrg[23] - piCur[23] );
727 uiSum += abs( piOrg[24] - piCur[24] );
728 uiSum += abs( piOrg[25] - piCur[25] );
729 uiSum += abs( piOrg[26] - piCur[26] );
730 uiSum += abs( piOrg[27] - piCur[27] );
731 uiSum += abs( piOrg[28] - piCur[28] );
732 uiSum += abs( piOrg[29] - piCur[29] );
733 uiSum += abs( piOrg[30] - piCur[30] );
734 uiSum += abs( piOrg[31] - piCur[31] );
735
736 piOrg += iStrideOrg;
737 piCur += iStrideCur;
738 }
739
740 uiSum <<= iSubShift;
741 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
742 }
743
744 #if AMP_SAD
745 Distortion TComRdCost::xGetSAD24( DistParam* pcDtParam )
746 {
747 if ( pcDtParam->bApplyWeight )
748 {
749 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
750 }
751 const Pel* piOrg = pcDtParam->pOrg;
752 const Pel* piCur = pcDtParam->pCur;
753 Int iRows = pcDtParam->iRows;
754 Int iSubShift = pcDtParam->iSubShift;
755 Int iSubStep = ( 1 << iSubShift );
756 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
757 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
758
759 Distortion uiSum = 0;
760
761 for( ; iRows != 0; iRows-=iSubStep )
762 {
763 uiSum += abs( piOrg[0] - piCur[0] );
764 uiSum += abs( piOrg[1] - piCur[1] );
765 uiSum += abs( piOrg[2] - piCur[2] );
766 uiSum += abs( piOrg[3] - piCur[3] );
767 uiSum += abs( piOrg[4] - piCur[4] );
768 uiSum += abs( piOrg[5] - piCur[5] );
769 uiSum += abs( piOrg[6] - piCur[6] );
770 uiSum += abs( piOrg[7] - piCur[7] );
771 uiSum += abs( piOrg[8] - piCur[8] );
772 uiSum += abs( piOrg[9] - piCur[9] );
773 uiSum += abs( piOrg[10] - piCur[10] );
774 uiSum += abs( piOrg[11] - piCur[11] );
775 uiSum += abs( piOrg[12] - piCur[12] );
776 uiSum += abs( piOrg[13] - piCur[13] );
777 uiSum += abs( piOrg[14] - piCur[14] );
778 uiSum += abs( piOrg[15] - piCur[15] );
779 uiSum += abs( piOrg[16] - piCur[16] );
780 uiSum += abs( piOrg[17] - piCur[17] );
781 uiSum += abs( piOrg[18] - piCur[18] );
782 uiSum += abs( piOrg[19] - piCur[19] );
783 uiSum += abs( piOrg[20] - piCur[20] );
784 uiSum += abs( piOrg[21] - piCur[21] );
785 uiSum += abs( piOrg[22] - piCur[22] );
786 uiSum += abs( piOrg[23] - piCur[23] );
787
788 piOrg += iStrideOrg;
789 piCur += iStrideCur;
790 }
791
792 uiSum <<= iSubShift;
793 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
794 }
795
796 #endif
797
798 Distortion TComRdCost::xGetSAD64( DistParam* pcDtParam )
799 {
800 if ( pcDtParam->bApplyWeight )
801 {
802 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
803 }
804 const Pel* piOrg = pcDtParam->pOrg;
805 const Pel* piCur = pcDtParam->pCur;
806 Int iRows = pcDtParam->iRows;
807 Int iSubShift = pcDtParam->iSubShift;
808 Int iSubStep = ( 1 << iSubShift );
809 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
810 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
811
812 Distortion uiSum = 0;
813
814 for( ; iRows != 0; iRows-=iSubStep )
815 {
816 uiSum += abs( piOrg[0] - piCur[0] );
817 uiSum += abs( piOrg[1] - piCur[1] );
818 uiSum += abs( piOrg[2] - piCur[2] );
819 uiSum += abs( piOrg[3] - piCur[3] );
820 uiSum += abs( piOrg[4] - piCur[4] );
821 uiSum += abs( piOrg[5] - piCur[5] );
822 uiSum += abs( piOrg[6] - piCur[6] );
823 uiSum += abs( piOrg[7] - piCur[7] );
824 uiSum += abs( piOrg[8] - piCur[8] );
825 uiSum += abs( piOrg[9] - piCur[9] );
826 uiSum += abs( piOrg[10] - piCur[10] );
827 uiSum += abs( piOrg[11] - piCur[11] );
828 uiSum += abs( piOrg[12] - piCur[12] );
829 uiSum += abs( piOrg[13] - piCur[13] );
830 uiSum += abs( piOrg[14] - piCur[14] );
831 uiSum += abs( piOrg[15] - piCur[15] );
832 uiSum += abs( piOrg[16] - piCur[16] );
833 uiSum += abs( piOrg[17] - piCur[17] );
834 uiSum += abs( piOrg[18] - piCur[18] );
835 uiSum += abs( piOrg[19] - piCur[19] );
836 uiSum += abs( piOrg[20] - piCur[20] );
837 uiSum += abs( piOrg[21] - piCur[21] );
838 uiSum += abs( piOrg[22] - piCur[22] );
839 uiSum += abs( piOrg[23] - piCur[23] );
840 uiSum += abs( piOrg[24] - piCur[24] );
841 uiSum += abs( piOrg[25] - piCur[25] );
842 uiSum += abs( piOrg[26] - piCur[26] );
843 uiSum += abs( piOrg[27] - piCur[27] );
844 uiSum += abs( piOrg[28] - piCur[28] );
845 uiSum += abs( piOrg[29] - piCur[29] );
846 uiSum += abs( piOrg[30] - piCur[30] );
847 uiSum += abs( piOrg[31] - piCur[31] );
848 uiSum += abs( piOrg[32] - piCur[32] );
849 uiSum += abs( piOrg[33] - piCur[33] );
850 uiSum += abs( piOrg[34] - piCur[34] );
851 uiSum += abs( piOrg[35] - piCur[35] );
852 uiSum += abs( piOrg[36] - piCur[36] );
853 uiSum += abs( piOrg[37] - piCur[37] );
854 uiSum += abs( piOrg[38] - piCur[38] );
855 uiSum += abs( piOrg[39] - piCur[39] );
856 uiSum += abs( piOrg[40] - piCur[40] );
857 uiSum += abs( piOrg[41] - piCur[41] );
858 uiSum += abs( piOrg[42] - piCur[42] );
859 uiSum += abs( piOrg[43] - piCur[43] );
860 uiSum += abs( piOrg[44] - piCur[44] );
861 uiSum += abs( piOrg[45] - piCur[45] );
862 uiSum += abs( piOrg[46] - piCur[46] );
863 uiSum += abs( piOrg[47] - piCur[47] );
864 uiSum += abs( piOrg[48] - piCur[48] );
865 uiSum += abs( piOrg[49] - piCur[49] );
866 uiSum += abs( piOrg[50] - piCur[50] );
867 uiSum += abs( piOrg[51] - piCur[51] );
868 uiSum += abs( piOrg[52] - piCur[52] );
869 uiSum += abs( piOrg[53] - piCur[53] );
870 uiSum += abs( piOrg[54] - piCur[54] );
871 uiSum += abs( piOrg[55] - piCur[55] );
872 uiSum += abs( piOrg[56] - piCur[56] );
873 uiSum += abs( piOrg[57] - piCur[57] );
874 uiSum += abs( piOrg[58] - piCur[58] );
875 uiSum += abs( piOrg[59] - piCur[59] );
876 uiSum += abs( piOrg[60] - piCur[60] );
877 uiSum += abs( piOrg[61] - piCur[61] );
878 uiSum += abs( piOrg[62] - piCur[62] );
879 uiSum += abs( piOrg[63] - piCur[63] );
880
881 piOrg += iStrideOrg;
882 piCur += iStrideCur;
883 }
884
885 uiSum <<= iSubShift;
886 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
887 }
888
889 #if AMP_SAD
890 Distortion TComRdCost::xGetSAD48( DistParam* pcDtParam )
891 {
892 if ( pcDtParam->bApplyWeight )
893 {
894 return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
895 }
896 const Pel* piOrg = pcDtParam->pOrg;
897 const Pel* piCur = pcDtParam->pCur;
898 Int iRows = pcDtParam->iRows;
899 Int iSubShift = pcDtParam->iSubShift;
900 Int iSubStep = ( 1 << iSubShift );
901 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
902 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
903
904 Distortion uiSum = 0;
905
906 for( ; iRows != 0; iRows-=iSubStep )
907 {
908 uiSum += abs( piOrg[0] - piCur[0] );
909 uiSum += abs( piOrg[1] - piCur[1] );
910 uiSum += abs( piOrg[2] - piCur[2] );
911 uiSum += abs( piOrg[3] - piCur[3] );
912 uiSum += abs( piOrg[4] - piCur[4] );
913 uiSum += abs( piOrg[5] - piCur[5] );
914 uiSum += abs( piOrg[6] - piCur[6] );
915 uiSum += abs( piOrg[7] - piCur[7] );
916 uiSum += abs( piOrg[8] - piCur[8] );
917 uiSum += abs( piOrg[9] - piCur[9] );
918 uiSum += abs( piOrg[10] - piCur[10] );
919 uiSum += abs( piOrg[11] - piCur[11] );
920 uiSum += abs( piOrg[12] - piCur[12] );
921 uiSum += abs( piOrg[13] - piCur[13] );
922 uiSum += abs( piOrg[14] - piCur[14] );
923 uiSum += abs( piOrg[15] - piCur[15] );
924 uiSum += abs( piOrg[16] - piCur[16] );
925 uiSum += abs( piOrg[17] - piCur[17] );
926 uiSum += abs( piOrg[18] - piCur[18] );
927 uiSum += abs( piOrg[19] - piCur[19] );
928 uiSum += abs( piOrg[20] - piCur[20] );
929 uiSum += abs( piOrg[21] - piCur[21] );
930 uiSum += abs( piOrg[22] - piCur[22] );
931 uiSum += abs( piOrg[23] - piCur[23] );
932 uiSum += abs( piOrg[24] - piCur[24] );
933 uiSum += abs( piOrg[25] - piCur[25] );
934 uiSum += abs( piOrg[26] - piCur[26] );
935 uiSum += abs( piOrg[27] - piCur[27] );
936 uiSum += abs( piOrg[28] - piCur[28] );
937 uiSum += abs( piOrg[29] - piCur[29] );
938 uiSum += abs( piOrg[30] - piCur[30] );
939 uiSum += abs( piOrg[31] - piCur[31] );
940 uiSum += abs( piOrg[32] - piCur[32] );
941 uiSum += abs( piOrg[33] - piCur[33] );
942 uiSum += abs( piOrg[34] - piCur[34] );
943 uiSum += abs( piOrg[35] - piCur[35] );
944 uiSum += abs( piOrg[36] - piCur[36] );
945 uiSum += abs( piOrg[37] - piCur[37] );
946 uiSum += abs( piOrg[38] - piCur[38] );
947 uiSum += abs( piOrg[39] - piCur[39] );
948 uiSum += abs( piOrg[40] - piCur[40] );
949 uiSum += abs( piOrg[41] - piCur[41] );
950 uiSum += abs( piOrg[42] - piCur[42] );
951 uiSum += abs( piOrg[43] - piCur[43] );
952 uiSum += abs( piOrg[44] - piCur[44] );
953 uiSum += abs( piOrg[45] - piCur[45] );
954 uiSum += abs( piOrg[46] - piCur[46] );
955 uiSum += abs( piOrg[47] - piCur[47] );
956
957 piOrg += iStrideOrg;
958 piCur += iStrideCur;
959 }
960
961 uiSum <<= iSubShift;
962 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
963 }
964 #endif
965
966 // --------------------------------------------------------------------------------------------------------------------
967 // SSE
968 // --------------------------------------------------------------------------------------------------------------------
969
970 Distortion TComRdCost::xGetSSE( DistParam* pcDtParam )
971 {
972 if ( pcDtParam->bApplyWeight )
973 {
974 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
975 }
976 const Pel* piOrg = pcDtParam->pOrg;
977 const Pel* piCur = pcDtParam->pCur;
978 Int iRows = pcDtParam->iRows;
979 Int iCols = pcDtParam->iCols;
980 Int iStrideOrg = pcDtParam->iStrideOrg;
981 Int iStrideCur = pcDtParam->iStrideCur;
982
983 Distortion uiSum = 0;
984 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
985
986 Intermediate_Int iTemp;
987
988 for( ; iRows != 0; iRows-- )
989 {
990 for (Int n = 0; n < iCols; n++ )
991 {
992 iTemp = piOrg[n ] - piCur[n ];
993 uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
994 }
995 piOrg += iStrideOrg;
996 piCur += iStrideCur;
997 }
998
999 return ( uiSum );
1000 }
1001
1002 Distortion TComRdCost::xGetSSE4( DistParam* pcDtParam )
1003 {
1004 if ( pcDtParam->bApplyWeight )
1005 {
1006 assert( pcDtParam->iCols == 4 );
1007 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1008 }
1009 const Pel* piOrg = pcDtParam->pOrg;
1010 const Pel* piCur = pcDtParam->pCur;
1011 Int iRows = pcDtParam->iRows;
1012 Int iStrideOrg = pcDtParam->iStrideOrg;
1013 Int iStrideCur = pcDtParam->iStrideCur;
1014
1015 Distortion uiSum = 0;
1016 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1017
1018 Intermediate_Int iTemp;
1019
1020 for( ; iRows != 0; iRows-- )
1021 {
1022
1023 iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1024 iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1025 iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1026 iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1027
1028 piOrg += iStrideOrg;
1029 piCur += iStrideCur;
1030 }
1031
1032 return ( uiSum );
1033 }
1034
1035 Distortion TComRdCost::xGetSSE8( DistParam* pcDtParam )
1036 {
1037 if ( pcDtParam->bApplyWeight )
1038 {
1039 assert( pcDtParam->iCols == 8 );
1040 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1041 }
1042 const Pel* piOrg = pcDtParam->pOrg;
1043 const Pel* piCur = pcDtParam->pCur;
1044 Int iRows = pcDtParam->iRows;
1045 Int iStrideOrg = pcDtParam->iStrideOrg;
1046 Int iStrideCur = pcDtParam->iStrideCur;
1047
1048 Distortion uiSum = 0;
1049 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1050
1051 Intermediate_Int iTemp;
1052
1053 for( ; iRows != 0; iRows-- )
1054 {
1055 iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1056 iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1057 iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1058 iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1059 iTemp = piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1060 iTemp = piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1061 iTemp = piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1062 iTemp = piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1063
1064 piOrg += iStrideOrg;
1065 piCur += iStrideCur;
1066 }
1067
1068 return ( uiSum );
1069 }
1070
1071 Distortion TComRdCost::xGetSSE16( DistParam* pcDtParam )
1072 {
1073 if ( pcDtParam->bApplyWeight )
1074 {
1075 assert( pcDtParam->iCols == 16 );
1076 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1077 }
1078 const Pel* piOrg = pcDtParam->pOrg;
1079 const Pel* piCur = pcDtParam->pCur;
1080 Int iRows = pcDtParam->iRows;
1081 Int iStrideOrg = pcDtParam->iStrideOrg;
1082 Int iStrideCur = pcDtParam->iStrideCur;
1083
1084 Distortion uiSum = 0;
1085 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1086
1087 Intermediate_Int iTemp;
1088
1089 for( ; iRows != 0; iRows-- )
1090 {
1091
1092 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1093 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1094 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1095 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1096 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1097 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1098 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1099 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1100 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1101 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1102 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1103 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1104 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1105 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1106 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1107 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1108
1109 piOrg += iStrideOrg;
1110 piCur += iStrideCur;
1111 }
1112
1113 return ( uiSum );
1114 }
1115
1116 Distortion TComRdCost::xGetSSE16N( DistParam* pcDtParam )
1117 {
1118 if ( pcDtParam->bApplyWeight )
1119 {
1120 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1121 }
1122 const Pel* piOrg = pcDtParam->pOrg;
1123 const Pel* piCur = pcDtParam->pCur;
1124 Int iRows = pcDtParam->iRows;
1125 Int iCols = pcDtParam->iCols;
1126 Int iStrideOrg = pcDtParam->iStrideOrg;
1127 Int iStrideCur = pcDtParam->iStrideCur;
1128
1129 Distortion uiSum = 0;
1130 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1131
1132 Intermediate_Int iTemp;
1133
1134 for( ; iRows != 0; iRows-- )
1135 {
1136 for (Int n = 0; n < iCols; n+=16 )
1137 {
1138
1139 iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1140 iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1141 iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1142 iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1143 iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1144 iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1145 iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1146 iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1147 iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1148 iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1149 iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1150 iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1151 iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1152 iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1153 iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1154 iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1155
1156 }
1157 piOrg += iStrideOrg;
1158 piCur += iStrideCur;
1159 }
1160
1161 return ( uiSum );
1162 }
1163
1164 Distortion TComRdCost::xGetSSE32( DistParam* pcDtParam )
1165 {
1166 if ( pcDtParam->bApplyWeight )
1167 {
1168 assert( pcDtParam->iCols == 32 );
1169 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1170 }
1171 const Pel* piOrg = pcDtParam->pOrg;
1172 const Pel* piCur = pcDtParam->pCur;
1173 Int iRows = pcDtParam->iRows;
1174 Int iStrideOrg = pcDtParam->iStrideOrg;
1175 Int iStrideCur = pcDtParam->iStrideCur;
1176
1177 Distortion uiSum = 0;
1178 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1179
1180 Intermediate_Int iTemp;
1181
1182 for( ; iRows != 0; iRows-- )
1183 {
1184
1185 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1186 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1187 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1188 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1189 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1190 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1191 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1192 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1193 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1194 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1195 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1196 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1197 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1198 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1199 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1200 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1201 iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1202 iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1203 iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1204 iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1205 iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1206 iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1207 iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1208 iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1209 iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1210 iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1211 iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1212 iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1213 iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1214 iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1215 iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1216 iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1217
1218 piOrg += iStrideOrg;
1219 piCur += iStrideCur;
1220 }
1221
1222 return ( uiSum );
1223 }
1224
1225 Distortion TComRdCost::xGetSSE64( DistParam* pcDtParam )
1226 {
1227 if ( pcDtParam->bApplyWeight )
1228 {
1229 assert( pcDtParam->iCols == 64 );
1230 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1231 }
1232 const Pel* piOrg = pcDtParam->pOrg;
1233 const Pel* piCur = pcDtParam->pCur;
1234 Int iRows = pcDtParam->iRows;
1235 Int iStrideOrg = pcDtParam->iStrideOrg;
1236 Int iStrideCur = pcDtParam->iStrideCur;
1237
1238 Distortion uiSum = 0;
1239 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1240
1241 Intermediate_Int iTemp;
1242
1243 for( ; iRows != 0; iRows-- )
1244 {
1245 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1246 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1247 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1248 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1249 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1250 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1251 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1252 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1253 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1254 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1255 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1256 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1257 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1258 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1259 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1260 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1261 iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1262 iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1263 iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1264 iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1265 iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1266 iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1267 iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1268 iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1269 iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1270 iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1271 iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1272 iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1273 iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1274 iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1275 iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1276 iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1277 iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1278 iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1279 iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1280 iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1281 iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1282 iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1283 iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1284 iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1285 iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1286 iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1287 iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1288 iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1289 iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1290 iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1291 iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1292 iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1293 iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1294 iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1295 iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1296 iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1297 iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1298 iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1299 iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1300 iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1301 iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1302 iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1303 iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1304 iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1305 iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1306 iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1307 iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1308 iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1309
1310 piOrg += iStrideOrg;
1311 piCur += iStrideCur;
1312 }
1313
1314 return ( uiSum );
1315 }
1316
1317 // --------------------------------------------------------------------------------------------------------------------
1318 // HADAMARD with step (used in fractional search)
1319 // --------------------------------------------------------------------------------------------------------------------
1320
1321 Distortion TComRdCost::xCalcHADs2x2( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1322 {
1323 Distortion satd = 0;
1324 TCoeff diff[4], m[4];
1325 assert( iStep == 1 );
1326 diff[0] = piOrg[0 ] - piCur[0];
1327 diff[1] = piOrg[1 ] - piCur[1];
1328 diff[2] = piOrg[iStrideOrg ] - piCur[0 + iStrideCur];
1329 diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur];
1330 m[0] = diff[0] + diff[2];
1331 m[1] = diff[1] + diff[3];
1332 m[2] = diff[0] - diff[2];
1333 m[3] = diff[1] - diff[3];
1334
1335 satd += abs(m[0] + m[1]);
1336 satd += abs(m[0] - m[1]);
1337 satd += abs(m[2] + m[3]);
1338 satd += abs(m[2] - m[3]);
1339
1340 return satd;
1341 }
1342
1343 Distortion TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1344 {
1345 Int k;
1346 Distortion satd = 0;
1347 TCoeff diff[16], m[16], d[16];
1348
1349 assert( iStep == 1 );
1350 for( k = 0; k < 16; k+=4 )
1351 {
1352 diff[k+0] = piOrg[0] - piCur[0];
1353 diff[k+1] = piOrg[1] - piCur[1];
1354 diff[k+2] = piOrg[2] - piCur[2];
1355 diff[k+3] = piOrg[3] - piCur[3];
1356
1357 piCur += iStrideCur;
1358 piOrg += iStrideOrg;
1359 }
1360
1361 /*===== hadamard transform =====*/
1362 m[ 0] = diff[ 0] + diff[12];
1363 m[ 1] = diff[ 1] + diff[13];
1364 m[ 2] = diff[ 2] + diff[14];
1365 m[ 3] = diff[ 3] + diff[15];
1366 m[ 4] = diff[ 4] + diff[ 8];
1367 m[ 5] = diff[ 5] + diff[ 9];
1368 m[ 6] = diff[ 6] + diff[10];
1369 m[ 7] = diff[ 7] + diff[11];
1370 m[ 8] = diff[ 4] - diff[ 8];
1371 m[ 9] = diff[ 5] - diff[ 9];
1372 m[10] = diff[ 6] - diff[10];
1373 m[11] = diff[ 7] - diff[11];
1374 m[12] = diff[ 0] - diff[12];
1375 m[13] = diff[ 1] - diff[13];
1376 m[14] = diff[ 2] - diff[14];
1377 m[15] = diff[ 3] - diff[15];
1378
1379 d[ 0] = m[ 0] + m[ 4];
1380 d[ 1] = m[ 1] + m[ 5];
1381 d[ 2] = m[ 2] + m[ 6];
1382 d[ 3] = m[ 3] + m[ 7];
1383 d[ 4] = m[ 8] + m[12];
1384 d[ 5] = m[ 9] + m[13];
1385 d[ 6] = m[10] + m[14];
1386 d[ 7] = m[11] + m[15];
1387 d[ 8] = m[ 0] - m[ 4];
1388 d[ 9] = m[ 1] - m[ 5];
1389 d[10] = m[ 2] - m[ 6];
1390 d[11] = m[ 3] - m[ 7];
1391 d[12] = m[12] - m[ 8];
1392 d[13] = m[13] - m[ 9];
1393 d[14] = m[14] - m[10];
1394 d[15] = m[15] - m[11];
1395
1396 m[ 0] = d[ 0] + d[ 3];
1397 m[ 1] = d[ 1] + d[ 2];
1398 m[ 2] = d[ 1] - d[ 2];
1399 m[ 3] = d[ 0] - d[ 3];
1400 m[ 4] = d[ 4] + d[ 7];
1401 m[ 5] = d[ 5] + d[ 6];
1402 m[ 6] = d[ 5] - d[ 6];
1403 m[ 7] = d[ 4] - d[ 7];
1404 m[ 8] = d[ 8] + d[11];
1405 m[ 9] = d[ 9] + d[10];
1406 m[10] = d[ 9] - d[10];
1407 m[11] = d[ 8] - d[11];
1408 m[12] = d[12] + d[15];
1409 m[13] = d[13] + d[14];
1410 m[14] = d[13] - d[14];
1411 m[15] = d[12] - d[15];
1412
1413 d[ 0] = m[ 0] + m[ 1];
1414 d[ 1] = m[ 0] - m[ 1];
1415 d[ 2] = m[ 2] + m[ 3];
1416 d[ 3] = m[ 3] - m[ 2];
1417 d[ 4] = m[ 4] + m[ 5];
1418 d[ 5] = m[ 4] - m[ 5];
1419 d[ 6] = m[ 6] + m[ 7];
1420 d[ 7] = m[ 7] - m[ 6];
1421 d[ 8] = m[ 8] + m[ 9];
1422 d[ 9] = m[ 8] - m[ 9];
1423 d[10] = m[10] + m[11];
1424 d[11] = m[11] - m[10];
1425 d[12] = m[12] + m[13];
1426 d[13] = m[12] - m[13];
1427 d[14] = m[14] + m[15];
1428 d[15] = m[15] - m[14];
1429
1430 for (k=0; k<16; ++k)
1431 {
1432 satd += abs(d[k]);
1433 }
1434 satd = ((satd+1)>>1);
1435
1436 return satd;
1437 }
1438
1439 Distortion TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1440 {
1441 Int k, i, j, jj;
1442 Distortion sad = 0;
1443 TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8];
1444 assert( iStep == 1 );
1445 for( k = 0; k < 64; k += 8 )
1446 {
1447 diff[k+0] = piOrg[0] - piCur[0];
1448 diff[k+1] = piOrg[1] - piCur[1];
1449 diff[k+2] = piOrg[2] - piCur[2];
1450 diff[k+3] = piOrg[3] - piCur[3];
1451 diff[k+4] = piOrg[4] - piCur[4];
1452 diff[k+5] = piOrg[5] - piCur[5];
1453 diff[k+6] = piOrg[6] - piCur[6];
1454 diff[k+7] = piOrg[7] - piCur[7];
1455
1456 piCur += iStrideCur;
1457 piOrg += iStrideOrg;
1458 }
1459
1460 //horizontal
1461 for (j=0; j < 8; j++)
1462 {
1463 jj = j << 3;
1464 m2[j][0] = diff[jj ] + diff[jj+4];
1465 m2[j][1] = diff[jj+1] + diff[jj+5];
1466 m2[j][2] = diff[jj+2] + diff[jj+6];
1467 m2[j][3] = diff[jj+3] + diff[jj+7];
1468 m2[j][4] = diff[jj ] - diff[jj+4];
1469 m2[j][5] = diff[jj+1] - diff[jj+5];
1470 m2[j][6] = diff[jj+2] - diff[jj+6];
1471 m2[j][7] = diff[jj+3] - diff[jj+7];
1472
1473 m1[j][0] = m2[j][0] + m2[j][2];
1474 m1[j][1] = m2[j][1] + m2[j][3];
1475 m1[j][2] = m2[j][0] - m2[j][2];
1476 m1[j][3] = m2[j][1] - m2[j][3];
1477 m1[j][4] = m2[j][4] + m2[j][6];
1478 m1[j][5] = m2[j][5] + m2[j][7];
1479 m1[j][6] = m2[j][4] - m2[j][6];
1480 m1[j][7] = m2[j][5] - m2[j][7];
1481
1482 m2[j][0] = m1[j][0] + m1[j][1];
1483 m2[j][1] = m1[j][0] - m1[j][1];
1484 m2[j][2] = m1[j][2] + m1[j][3];
1485 m2[j][3] = m1[j][2] - m1[j][3];
1486 m2[j][4] = m1[j][4] + m1[j][5];
1487 m2[j][5] = m1[j][4] - m1[j][5];
1488 m2[j][6] = m1[j][6] + m1[j][7];
1489 m2[j][7] = m1[j][6] - m1[j][7];
1490 }
1491
1492 //vertical
1493 for (i=0; i < 8; i++)
1494 {
1495 m3[0][i] = m2[0][i] + m2[4][i];
1496 m3[1][i] = m2[1][i] + m2[5][i];
1497 m3[2][i] = m2[2][i] + m2[6][i];
1498 m3[3][i] = m2[3][i] + m2[7][i];
1499 m3[4][i] = m2[0][i] - m2[4][i];
1500 m3[5][i] = m2[1][i] - m2[5][i];
1501 m3[6][i] = m2[2][i] - m2[6][i];
1502 m3[7][i] = m2[3][i] - m2[7][i];
1503
1504 m1[0][i] = m3[0][i] + m3[2][i];
1505 m1[1][i] = m3[1][i] + m3[3][i];
1506 m1[2][i] = m3[0][i] - m3[2][i];
1507 m1[3][i] = m3[1][i] - m3[3][i];
1508 m1[4][i] = m3[4][i] + m3[6][i];
1509 m1[5][i] = m3[5][i] + m3[7][i];
1510 m1[6][i] = m3[4][i] - m3[6][i];
1511 m1[7][i] = m3[5][i] - m3[7][i];
1512
1513 m2[0][i] = m1[0][i] + m1[1][i];
1514 m2[1][i] = m1[0][i] - m1[1][i];
1515 m2[2][i] = m1[2][i] + m1[3][i];
1516 m2[3][i] = m1[2][i] - m1[3][i];
1517 m2[4][i] = m1[4][i] + m1[5][i];
1518 m2[5][i] = m1[4][i] - m1[5][i];
1519 m2[6][i] = m1[6][i] + m1[7][i];
1520 m2[7][i] = m1[6][i] - m1[7][i];
1521 }
1522
1523 for (i = 0; i < 8; i++)
1524 {
1525 for (j = 0; j < 8; j++)
1526 {
1527 sad += abs(m2[i][j]);
1528 }
1529 }
1530
1531 sad=((sad+2)>>2);
1532
1533 return sad;
1534 }
1535
1536
1537 Distortion TComRdCost::xGetHADs( DistParam* pcDtParam )
1538 {
1539 if ( pcDtParam->bApplyWeight )
1540 {
1541 return TComRdCostWeightPrediction::xGetHADsw( pcDtParam );
1542 }
1543 Pel* piOrg = pcDtParam->pOrg;
1544 Pel* piCur = pcDtParam->pCur;
1545 Int iRows = pcDtParam->iRows;
1546 Int iCols = pcDtParam->iCols;
1547 Int iStrideCur = pcDtParam->iStrideCur;
1548 Int iStrideOrg = pcDtParam->iStrideOrg;
1549 Int iStep = pcDtParam->iStep;
1550
1551 Int x, y;
1552
1553 Distortion uiSum = 0;
1554
1555 if( ( iRows % 8 == 0) && (iCols % 8 == 0) )
1556 {
1557 Int iOffsetOrg = iStrideOrg<<3;
1558 Int iOffsetCur = iStrideCur<<3;
1559 for ( y=0; y<iRows; y+= 8 )
1560 {
1561 for ( x=0; x<iCols; x+= 8 )
1562 {
1563 uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1564 }
1565 piOrg += iOffsetOrg;
1566 piCur += iOffsetCur;
1567 }
1568 }
1569 else if( ( iRows % 4 == 0) && (iCols % 4 == 0) )
1570 {
1571 Int iOffsetOrg = iStrideOrg<<2;
1572 Int iOffsetCur = iStrideCur<<2;
1573
1574 for ( y=0; y<iRows; y+= 4 )
1575 {
1576 for ( x=0; x<iCols; x+= 4 )
1577 {
1578 uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1579 }
1580 piOrg += iOffsetOrg;
1581 piCur += iOffsetCur;
1582 }
1583 }
1584 else if( ( iRows % 2 == 0) && (iCols % 2 == 0) )
1585 {
1586 Int iOffsetOrg = iStrideOrg<<1;
1587 Int iOffsetCur = iStrideCur<<1;
1588 for ( y=0; y<iRows; y+=2 )
1589 {
1590 for ( x=0; x<iCols; x+=2 )
1591 {
1592 uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1593 }
1594 piOrg += iOffsetOrg;
1595 piCur += iOffsetCur;
1596 }
1597 }
1598 else
1599 {
1600 assert(false);
1601 }
1602
1603 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
1604 }
1605
1606 //! \}