Mercurial > hg > forks > libbpg
comparison jctvc/TLibCommon/TComRdCost.cpp @ 0:772086c29cc7
Initial import.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 16 Nov 2016 11:16:33 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:772086c29cc7 |
---|---|
1 /* The copyright in this software is being made available under the BSD | |
2 * License, included below. This software may be subject to other third party | |
3 * and contributor rights, including patent rights, and no such rights are | |
4 * granted under this license. | |
5 * | |
6 * Copyright (c) 2010-2014, ITU/ISO/IEC | |
7 * All rights reserved. | |
8 * | |
9 * Redistribution and use in source and binary forms, with or without | |
10 * modification, are permitted provided that the following conditions are met: | |
11 * | |
12 * * Redistributions of source code must retain the above copyright notice, | |
13 * this list of conditions and the following disclaimer. | |
14 * * Redistributions in binary form must reproduce the above copyright notice, | |
15 * this list of conditions and the following disclaimer in the documentation | |
16 * and/or other materials provided with the distribution. | |
17 * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may | |
18 * be used to endorse or promote products derived from this software without | |
19 * specific prior written permission. | |
20 * | |
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS | |
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |
31 * THE POSSIBILITY OF SUCH DAMAGE. | |
32 */ | |
33 | |
34 /** \file TComRdCost.cpp | |
35 \brief RD cost computation class | |
36 */ | |
37 | |
38 #include <math.h> | |
39 #include <assert.h> | |
40 #include "TComRom.h" | |
41 #include "TComRdCost.h" | |
42 | |
43 //! \ingroup TLibCommon | |
44 //! \{ | |
45 | |
46 TComRdCost::TComRdCost() | |
47 { | |
48 init(); | |
49 } | |
50 | |
51 TComRdCost::~TComRdCost() | |
52 { | |
53 } | |
54 | |
55 // Calculate RD functions | |
56 Double TComRdCost::calcRdCost( UInt uiBits, Distortion uiDistortion, Bool bFlag, DFunc eDFunc ) | |
57 { | |
58 Double dRdCost = 0.0; | |
59 Double dLambda = 0.0; | |
60 | |
61 switch ( eDFunc ) | |
62 { | |
63 case DF_SSE: | |
64 assert(0); | |
65 break; | |
66 case DF_SAD: | |
67 #if RExt__HIGH_BIT_DEPTH_SUPPORT | |
68 dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate. | |
69 #else | |
70 dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate. | |
71 #endif | |
72 break; | |
73 case DF_DEFAULT: | |
74 dLambda = m_dLambda; | |
75 break; | |
76 case DF_SSE_FRAME: | |
77 dLambda = m_dFrameLambda; | |
78 break; | |
79 default: | |
80 assert (0); | |
81 break; | |
82 } | |
83 | |
84 if (bFlag) //NOTE: this "bFlag" is never true | |
85 { | |
86 // Intra8x8, Intra4x4 Block only... | |
87 if (m_costMode != COST_STANDARD_LOSSY) | |
88 { | |
89 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used. | |
90 } | |
91 else | |
92 { | |
93 dRdCost = (((Double)uiDistortion) + ((Double)uiBits * dLambda)); | |
94 } | |
95 } | |
96 else | |
97 { | |
98 if (eDFunc == DF_SAD) | |
99 { | |
100 if (m_costMode != COST_STANDARD_LOSSY) | |
101 { | |
102 dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used. | |
103 } | |
104 else | |
105 { | |
106 dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0)); | |
107 } | |
108 } | |
109 else | |
110 { | |
111 if (m_costMode != COST_STANDARD_LOSSY) | |
112 { | |
113 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used. | |
114 } | |
115 else | |
116 { | |
117 dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5); | |
118 } | |
119 } | |
120 } | |
121 | |
122 return dRdCost; | |
123 } | |
124 | |
125 Double TComRdCost::calcRdCost64( UInt64 uiBits, UInt64 uiDistortion, Bool bFlag, DFunc eDFunc ) | |
126 { | |
127 Double dRdCost = 0.0; | |
128 Double dLambda = 0.0; | |
129 | |
130 switch ( eDFunc ) | |
131 { | |
132 case DF_SSE: | |
133 assert(0); | |
134 break; | |
135 case DF_SAD: | |
136 #if RExt__HIGH_BIT_DEPTH_SUPPORT | |
137 dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate. | |
138 #else | |
139 dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate. | |
140 #endif | |
141 break; | |
142 case DF_DEFAULT: | |
143 dLambda = m_dLambda; | |
144 break; | |
145 case DF_SSE_FRAME: | |
146 dLambda = m_dFrameLambda; | |
147 break; | |
148 default: | |
149 assert (0); | |
150 break; | |
151 } | |
152 | |
153 if (bFlag) //NOTE: this "bFlag" is never true | |
154 { | |
155 // Intra8x8, Intra4x4 Block only... | |
156 if (m_costMode != COST_STANDARD_LOSSY) | |
157 { | |
158 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used. | |
159 } | |
160 else | |
161 { | |
162 dRdCost = (((Double)(Int64)uiDistortion) + ((Double)(Int64)uiBits * dLambda)); | |
163 } | |
164 } | |
165 else | |
166 { | |
167 if (eDFunc == DF_SAD) | |
168 { | |
169 if (m_costMode != COST_STANDARD_LOSSY) | |
170 { | |
171 dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used. | |
172 } | |
173 else | |
174 { | |
175 dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0)); | |
176 } | |
177 } | |
178 else | |
179 { | |
180 if (m_costMode != COST_STANDARD_LOSSY) | |
181 { | |
182 dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used. | |
183 } | |
184 else | |
185 { | |
186 dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5); | |
187 } | |
188 } | |
189 } | |
190 | |
191 return dRdCost; | |
192 } | |
193 | |
194 Void TComRdCost::setLambda( Double dLambda ) | |
195 { | |
196 m_dLambda = dLambda; | |
197 m_sqrtLambda = sqrt(m_dLambda); | |
198 #if RExt__HIGH_BIT_DEPTH_SUPPORT | |
199 m_dLambdaMotionSAD[0] = 65536.0 * m_sqrtLambda; | |
200 m_dLambdaMotionSSE[0] = 65536.0 * m_dLambda; | |
201 #if FULL_NBIT | |
202 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0)); | |
203 #else | |
204 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0)); | |
205 #endif | |
206 m_dLambdaMotionSAD[1] = 65536.0 * sqrt(dLambda); | |
207 m_dLambdaMotionSSE[1] = 65536.0 * dLambda; | |
208 #else | |
209 m_uiLambdaMotionSAD[0] = (UInt)floor(65536.0 * m_sqrtLambda); | |
210 m_uiLambdaMotionSSE[0] = (UInt)floor(65536.0 * m_dLambda ); | |
211 #if FULL_NBIT | |
212 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0)); | |
213 #else | |
214 dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0)); | |
215 #endif | |
216 m_uiLambdaMotionSAD[1] = (UInt)floor(65536.0 * sqrt(dLambda)); | |
217 m_uiLambdaMotionSSE[1] = (UInt)floor(65536.0 * dLambda ); | |
218 #endif | |
219 } | |
220 | |
221 | |
222 // Initalize Function Pointer by [eDFunc] | |
223 Void TComRdCost::init() | |
224 { | |
225 m_afpDistortFunc[DF_DEFAULT] = NULL; // for DF_DEFAULT | |
226 | |
227 m_afpDistortFunc[DF_SSE ] = TComRdCost::xGetSSE; | |
228 m_afpDistortFunc[DF_SSE4 ] = TComRdCost::xGetSSE4; | |
229 m_afpDistortFunc[DF_SSE8 ] = TComRdCost::xGetSSE8; | |
230 m_afpDistortFunc[DF_SSE16 ] = TComRdCost::xGetSSE16; | |
231 m_afpDistortFunc[DF_SSE32 ] = TComRdCost::xGetSSE32; | |
232 m_afpDistortFunc[DF_SSE64 ] = TComRdCost::xGetSSE64; | |
233 m_afpDistortFunc[DF_SSE16N ] = TComRdCost::xGetSSE16N; | |
234 | |
235 m_afpDistortFunc[DF_SAD ] = TComRdCost::xGetSAD; | |
236 m_afpDistortFunc[DF_SAD4 ] = TComRdCost::xGetSAD4; | |
237 m_afpDistortFunc[DF_SAD8 ] = TComRdCost::xGetSAD8; | |
238 m_afpDistortFunc[DF_SAD16 ] = TComRdCost::xGetSAD16; | |
239 m_afpDistortFunc[DF_SAD32 ] = TComRdCost::xGetSAD32; | |
240 m_afpDistortFunc[DF_SAD64 ] = TComRdCost::xGetSAD64; | |
241 m_afpDistortFunc[DF_SAD16N ] = TComRdCost::xGetSAD16N; | |
242 | |
243 m_afpDistortFunc[DF_SADS ] = TComRdCost::xGetSAD; | |
244 m_afpDistortFunc[DF_SADS4 ] = TComRdCost::xGetSAD4; | |
245 m_afpDistortFunc[DF_SADS8 ] = TComRdCost::xGetSAD8; | |
246 m_afpDistortFunc[DF_SADS16 ] = TComRdCost::xGetSAD16; | |
247 m_afpDistortFunc[DF_SADS32 ] = TComRdCost::xGetSAD32; | |
248 m_afpDistortFunc[DF_SADS64 ] = TComRdCost::xGetSAD64; | |
249 m_afpDistortFunc[DF_SADS16N] = TComRdCost::xGetSAD16N; | |
250 | |
251 #if AMP_SAD | |
252 m_afpDistortFunc[DF_SAD12 ] = TComRdCost::xGetSAD12; | |
253 m_afpDistortFunc[DF_SAD24 ] = TComRdCost::xGetSAD24; | |
254 m_afpDistortFunc[DF_SAD48 ] = TComRdCost::xGetSAD48; | |
255 | |
256 m_afpDistortFunc[DF_SADS12 ] = TComRdCost::xGetSAD12; | |
257 m_afpDistortFunc[DF_SADS24 ] = TComRdCost::xGetSAD24; | |
258 m_afpDistortFunc[DF_SADS48 ] = TComRdCost::xGetSAD48; | |
259 #endif | |
260 m_afpDistortFunc[DF_HADS ] = TComRdCost::xGetHADs; | |
261 m_afpDistortFunc[DF_HADS4 ] = TComRdCost::xGetHADs; | |
262 m_afpDistortFunc[DF_HADS8 ] = TComRdCost::xGetHADs; | |
263 m_afpDistortFunc[DF_HADS16 ] = TComRdCost::xGetHADs; | |
264 m_afpDistortFunc[DF_HADS32 ] = TComRdCost::xGetHADs; | |
265 m_afpDistortFunc[DF_HADS64 ] = TComRdCost::xGetHADs; | |
266 m_afpDistortFunc[DF_HADS16N] = TComRdCost::xGetHADs; | |
267 | |
268 m_costMode = COST_STANDARD_LOSSY; | |
269 | |
270 #if RExt__HIGH_BIT_DEPTH_SUPPORT | |
271 m_dCost = 0; | |
272 #else | |
273 m_uiCost = 0; | |
274 #endif | |
275 m_iCostScale = 0; | |
276 } | |
277 | |
278 UInt TComRdCost::xGetComponentBits( Int iVal ) | |
279 { | |
280 UInt uiLength = 1; | |
281 UInt uiTemp = ( iVal <= 0) ? (-iVal<<1)+1: (iVal<<1); | |
282 | |
283 assert ( uiTemp ); | |
284 | |
285 while ( 1 != uiTemp ) | |
286 { | |
287 uiTemp >>= 1; | |
288 uiLength += 2; | |
289 } | |
290 | |
291 return uiLength; | |
292 } | |
293 | |
294 Void TComRdCost::setDistParam( UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam& rcDistParam ) | |
295 { | |
296 // set Block Width / Height | |
297 rcDistParam.iCols = uiBlkWidth; | |
298 rcDistParam.iRows = uiBlkHeight; | |
299 rcDistParam.DistFunc = m_afpDistortFunc[eDFunc + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ]; | |
300 | |
301 // initialize | |
302 rcDistParam.iSubShift = 0; | |
303 } | |
304 | |
305 // Setting the Distortion Parameter for Inter (ME) | |
306 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, DistParam& rcDistParam ) | |
307 { | |
308 // set Original & Curr Pointer / Stride | |
309 rcDistParam.pOrg = pcPatternKey->getROIY(); | |
310 rcDistParam.pCur = piRefY; | |
311 | |
312 rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride(); | |
313 rcDistParam.iStrideCur = iRefStride; | |
314 | |
315 // set Block Width / Height | |
316 rcDistParam.iCols = pcPatternKey->getROIYWidth(); | |
317 rcDistParam.iRows = pcPatternKey->getROIYHeight(); | |
318 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ]; | |
319 | |
320 #if AMP_SAD | |
321 if (rcDistParam.iCols == 12) | |
322 { | |
323 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD12]; | |
324 } | |
325 else if (rcDistParam.iCols == 24) | |
326 { | |
327 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD24]; | |
328 } | |
329 else if (rcDistParam.iCols == 48) | |
330 { | |
331 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD48]; | |
332 } | |
333 #endif | |
334 | |
335 // initialize | |
336 rcDistParam.iSubShift = 0; | |
337 } | |
338 | |
339 // Setting the Distortion Parameter for Inter (subpel ME with step) | |
340 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME ) | |
341 { | |
342 // set Original & Curr Pointer / Stride | |
343 rcDistParam.pOrg = pcPatternKey->getROIY(); | |
344 rcDistParam.pCur = piRefY; | |
345 | |
346 rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride(); | |
347 rcDistParam.iStrideCur = iRefStride * iStep; | |
348 | |
349 // set Step for interpolated buffer | |
350 rcDistParam.iStep = iStep; | |
351 | |
352 // set Block Width / Height | |
353 rcDistParam.iCols = pcPatternKey->getROIYWidth(); | |
354 rcDistParam.iRows = pcPatternKey->getROIYHeight(); | |
355 | |
356 // set distortion function | |
357 if ( !bHADME ) | |
358 { | |
359 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ]; | |
360 #if AMP_SAD | |
361 if (rcDistParam.iCols == 12) | |
362 { | |
363 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS12]; | |
364 } | |
365 else if (rcDistParam.iCols == 24) | |
366 { | |
367 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS24]; | |
368 } | |
369 else if (rcDistParam.iCols == 48) | |
370 { | |
371 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS48]; | |
372 } | |
373 #endif | |
374 } | |
375 else | |
376 { | |
377 rcDistParam.DistFunc = m_afpDistortFunc[DF_HADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ]; | |
378 } | |
379 | |
380 // initialize | |
381 rcDistParam.iSubShift = 0; | |
382 } | |
383 | |
384 Void TComRdCost::setDistParam( DistParam& rcDP, Int bitDepth, Pel* p1, Int iStride1, Pel* p2, Int iStride2, Int iWidth, Int iHeight, Bool bHadamard ) | |
385 { | |
386 rcDP.pOrg = p1; | |
387 rcDP.pCur = p2; | |
388 rcDP.iStrideOrg = iStride1; | |
389 rcDP.iStrideCur = iStride2; | |
390 rcDP.iCols = iWidth; | |
391 rcDP.iRows = iHeight; | |
392 rcDP.iStep = 1; | |
393 rcDP.iSubShift = 0; | |
394 rcDP.bitDepth = bitDepth; | |
395 rcDP.DistFunc = m_afpDistortFunc[ ( bHadamard ? DF_HADS : DF_SADS ) + g_aucConvertToBit[ iWidth ] + 1 ]; | |
396 } | |
397 | |
398 Distortion TComRdCost::calcHAD( Int bitDepth, Pel* pi0, Int iStride0, Pel* pi1, Int iStride1, Int iWidth, Int iHeight ) | |
399 { | |
400 Distortion uiSum = 0; | |
401 Int x, y; | |
402 | |
403 if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) ) | |
404 { | |
405 for ( y=0; y<iHeight; y+= 8 ) | |
406 { | |
407 for ( x=0; x<iWidth; x+= 8 ) | |
408 { | |
409 uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 ); | |
410 } | |
411 pi0 += iStride0*8; | |
412 pi1 += iStride1*8; | |
413 } | |
414 } | |
415 else | |
416 { | |
417 assert ( ( (iWidth % 4) == 0 ) && ( (iHeight % 4) == 0 ) ); | |
418 | |
419 for ( y=0; y<iHeight; y+= 4 ) | |
420 { | |
421 for ( x=0; x<iWidth; x+= 4 ) | |
422 { | |
423 uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 ); | |
424 } | |
425 pi0 += iStride0*4; | |
426 pi1 += iStride1*4; | |
427 } | |
428 } | |
429 | |
430 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8) ); | |
431 } | |
432 | |
433 Distortion TComRdCost::getDistPart( Int bitDepth, Pel* piCur, Int iCurStride, Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, const ComponentID compID, DFunc eDFunc ) | |
434 { | |
435 DistParam cDtParam; | |
436 setDistParam( uiBlkWidth, uiBlkHeight, eDFunc, cDtParam ); | |
437 cDtParam.pOrg = piOrg; | |
438 cDtParam.pCur = piCur; | |
439 cDtParam.iStrideOrg = iOrgStride; | |
440 cDtParam.iStrideCur = iCurStride; | |
441 cDtParam.iStep = 1; | |
442 | |
443 cDtParam.bApplyWeight = false; | |
444 cDtParam.compIdx = MAX_NUM_COMPONENT; // just for assert: to be sure it was set before use | |
445 cDtParam.bitDepth = bitDepth; | |
446 | |
447 if (isChroma(compID)) | |
448 { | |
449 return ((Distortion) (m_distortionWeight[compID] * cDtParam.DistFunc( &cDtParam ))); | |
450 } | |
451 else | |
452 { | |
453 return cDtParam.DistFunc( &cDtParam ); | |
454 } | |
455 } | |
456 | |
457 // ==================================================================================================================== | |
458 // Distortion functions | |
459 // ==================================================================================================================== | |
460 | |
461 // -------------------------------------------------------------------------------------------------------------------- | |
462 // SAD | |
463 // -------------------------------------------------------------------------------------------------------------------- | |
464 | |
465 Distortion TComRdCost::xGetSAD( DistParam* pcDtParam ) | |
466 { | |
467 if ( pcDtParam->bApplyWeight ) | |
468 { | |
469 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
470 } | |
471 const Pel* piOrg = pcDtParam->pOrg; | |
472 const Pel* piCur = pcDtParam->pCur; | |
473 Int iRows = pcDtParam->iRows; | |
474 Int iCols = pcDtParam->iCols; | |
475 Int iStrideCur = pcDtParam->iStrideCur; | |
476 Int iStrideOrg = pcDtParam->iStrideOrg; | |
477 | |
478 Distortion uiSum = 0; | |
479 | |
480 for( ; iRows != 0; iRows-- ) | |
481 { | |
482 for (Int n = 0; n < iCols; n++ ) | |
483 { | |
484 uiSum += abs( piOrg[n] - piCur[n] ); | |
485 } | |
486 piOrg += iStrideOrg; | |
487 piCur += iStrideCur; | |
488 } | |
489 | |
490 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
491 } | |
492 | |
493 Distortion TComRdCost::xGetSAD4( DistParam* pcDtParam ) | |
494 { | |
495 if ( pcDtParam->bApplyWeight ) | |
496 { | |
497 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
498 } | |
499 const Pel* piOrg = pcDtParam->pOrg; | |
500 const Pel* piCur = pcDtParam->pCur; | |
501 Int iRows = pcDtParam->iRows; | |
502 Int iSubShift = pcDtParam->iSubShift; | |
503 Int iSubStep = ( 1 << iSubShift ); | |
504 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
505 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
506 | |
507 Distortion uiSum = 0; | |
508 | |
509 for( ; iRows != 0; iRows-=iSubStep ) | |
510 { | |
511 uiSum += abs( piOrg[0] - piCur[0] ); | |
512 uiSum += abs( piOrg[1] - piCur[1] ); | |
513 uiSum += abs( piOrg[2] - piCur[2] ); | |
514 uiSum += abs( piOrg[3] - piCur[3] ); | |
515 | |
516 piOrg += iStrideOrg; | |
517 piCur += iStrideCur; | |
518 } | |
519 | |
520 uiSum <<= iSubShift; | |
521 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
522 } | |
523 | |
524 Distortion TComRdCost::xGetSAD8( DistParam* pcDtParam ) | |
525 { | |
526 if ( pcDtParam->bApplyWeight ) | |
527 { | |
528 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
529 } | |
530 const Pel* piOrg = pcDtParam->pOrg; | |
531 const Pel* piCur = pcDtParam->pCur; | |
532 Int iRows = pcDtParam->iRows; | |
533 Int iSubShift = pcDtParam->iSubShift; | |
534 Int iSubStep = ( 1 << iSubShift ); | |
535 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
536 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
537 | |
538 Distortion uiSum = 0; | |
539 | |
540 for( ; iRows != 0; iRows-=iSubStep ) | |
541 { | |
542 uiSum += abs( piOrg[0] - piCur[0] ); | |
543 uiSum += abs( piOrg[1] - piCur[1] ); | |
544 uiSum += abs( piOrg[2] - piCur[2] ); | |
545 uiSum += abs( piOrg[3] - piCur[3] ); | |
546 uiSum += abs( piOrg[4] - piCur[4] ); | |
547 uiSum += abs( piOrg[5] - piCur[5] ); | |
548 uiSum += abs( piOrg[6] - piCur[6] ); | |
549 uiSum += abs( piOrg[7] - piCur[7] ); | |
550 | |
551 piOrg += iStrideOrg; | |
552 piCur += iStrideCur; | |
553 } | |
554 | |
555 uiSum <<= iSubShift; | |
556 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
557 } | |
558 | |
559 Distortion TComRdCost::xGetSAD16( DistParam* pcDtParam ) | |
560 { | |
561 if ( pcDtParam->bApplyWeight ) | |
562 { | |
563 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
564 } | |
565 const Pel* piOrg = pcDtParam->pOrg; | |
566 const Pel* piCur = pcDtParam->pCur; | |
567 Int iRows = pcDtParam->iRows; | |
568 Int iSubShift = pcDtParam->iSubShift; | |
569 Int iSubStep = ( 1 << iSubShift ); | |
570 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
571 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
572 | |
573 Distortion uiSum = 0; | |
574 | |
575 for( ; iRows != 0; iRows-=iSubStep ) | |
576 { | |
577 uiSum += abs( piOrg[0] - piCur[0] ); | |
578 uiSum += abs( piOrg[1] - piCur[1] ); | |
579 uiSum += abs( piOrg[2] - piCur[2] ); | |
580 uiSum += abs( piOrg[3] - piCur[3] ); | |
581 uiSum += abs( piOrg[4] - piCur[4] ); | |
582 uiSum += abs( piOrg[5] - piCur[5] ); | |
583 uiSum += abs( piOrg[6] - piCur[6] ); | |
584 uiSum += abs( piOrg[7] - piCur[7] ); | |
585 uiSum += abs( piOrg[8] - piCur[8] ); | |
586 uiSum += abs( piOrg[9] - piCur[9] ); | |
587 uiSum += abs( piOrg[10] - piCur[10] ); | |
588 uiSum += abs( piOrg[11] - piCur[11] ); | |
589 uiSum += abs( piOrg[12] - piCur[12] ); | |
590 uiSum += abs( piOrg[13] - piCur[13] ); | |
591 uiSum += abs( piOrg[14] - piCur[14] ); | |
592 uiSum += abs( piOrg[15] - piCur[15] ); | |
593 | |
594 piOrg += iStrideOrg; | |
595 piCur += iStrideCur; | |
596 } | |
597 | |
598 uiSum <<= iSubShift; | |
599 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
600 } | |
601 | |
602 #if AMP_SAD | |
603 Distortion TComRdCost::xGetSAD12( DistParam* pcDtParam ) | |
604 { | |
605 if ( pcDtParam->bApplyWeight ) | |
606 { | |
607 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
608 } | |
609 const Pel* piOrg = pcDtParam->pOrg; | |
610 const Pel* piCur = pcDtParam->pCur; | |
611 Int iRows = pcDtParam->iRows; | |
612 Int iSubShift = pcDtParam->iSubShift; | |
613 Int iSubStep = ( 1 << iSubShift ); | |
614 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
615 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
616 | |
617 Distortion uiSum = 0; | |
618 | |
619 for( ; iRows != 0; iRows-=iSubStep ) | |
620 { | |
621 uiSum += abs( piOrg[0] - piCur[0] ); | |
622 uiSum += abs( piOrg[1] - piCur[1] ); | |
623 uiSum += abs( piOrg[2] - piCur[2] ); | |
624 uiSum += abs( piOrg[3] - piCur[3] ); | |
625 uiSum += abs( piOrg[4] - piCur[4] ); | |
626 uiSum += abs( piOrg[5] - piCur[5] ); | |
627 uiSum += abs( piOrg[6] - piCur[6] ); | |
628 uiSum += abs( piOrg[7] - piCur[7] ); | |
629 uiSum += abs( piOrg[8] - piCur[8] ); | |
630 uiSum += abs( piOrg[9] - piCur[9] ); | |
631 uiSum += abs( piOrg[10] - piCur[10] ); | |
632 uiSum += abs( piOrg[11] - piCur[11] ); | |
633 | |
634 piOrg += iStrideOrg; | |
635 piCur += iStrideCur; | |
636 } | |
637 | |
638 uiSum <<= iSubShift; | |
639 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
640 } | |
641 #endif | |
642 | |
643 Distortion TComRdCost::xGetSAD16N( DistParam* pcDtParam ) | |
644 { | |
645 const Pel* piOrg = pcDtParam->pOrg; | |
646 const Pel* piCur = pcDtParam->pCur; | |
647 Int iRows = pcDtParam->iRows; | |
648 Int iCols = pcDtParam->iCols; | |
649 Int iSubShift = pcDtParam->iSubShift; | |
650 Int iSubStep = ( 1 << iSubShift ); | |
651 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
652 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
653 | |
654 Distortion uiSum = 0; | |
655 | |
656 for( ; iRows != 0; iRows-=iSubStep ) | |
657 { | |
658 for (Int n = 0; n < iCols; n+=16 ) | |
659 { | |
660 uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] ); | |
661 uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] ); | |
662 uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] ); | |
663 uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] ); | |
664 uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] ); | |
665 uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] ); | |
666 uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] ); | |
667 uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] ); | |
668 uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] ); | |
669 uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] ); | |
670 uiSum += abs( piOrg[n+10] - piCur[n+10] ); | |
671 uiSum += abs( piOrg[n+11] - piCur[n+11] ); | |
672 uiSum += abs( piOrg[n+12] - piCur[n+12] ); | |
673 uiSum += abs( piOrg[n+13] - piCur[n+13] ); | |
674 uiSum += abs( piOrg[n+14] - piCur[n+14] ); | |
675 uiSum += abs( piOrg[n+15] - piCur[n+15] ); | |
676 } | |
677 piOrg += iStrideOrg; | |
678 piCur += iStrideCur; | |
679 } | |
680 | |
681 uiSum <<= iSubShift; | |
682 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
683 } | |
684 | |
685 Distortion TComRdCost::xGetSAD32( DistParam* pcDtParam ) | |
686 { | |
687 if ( pcDtParam->bApplyWeight ) | |
688 { | |
689 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
690 } | |
691 const Pel* piOrg = pcDtParam->pOrg; | |
692 const Pel* piCur = pcDtParam->pCur; | |
693 Int iRows = pcDtParam->iRows; | |
694 Int iSubShift = pcDtParam->iSubShift; | |
695 Int iSubStep = ( 1 << iSubShift ); | |
696 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
697 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
698 | |
699 Distortion uiSum = 0; | |
700 | |
701 for( ; iRows != 0; iRows-=iSubStep ) | |
702 { | |
703 uiSum += abs( piOrg[0] - piCur[0] ); | |
704 uiSum += abs( piOrg[1] - piCur[1] ); | |
705 uiSum += abs( piOrg[2] - piCur[2] ); | |
706 uiSum += abs( piOrg[3] - piCur[3] ); | |
707 uiSum += abs( piOrg[4] - piCur[4] ); | |
708 uiSum += abs( piOrg[5] - piCur[5] ); | |
709 uiSum += abs( piOrg[6] - piCur[6] ); | |
710 uiSum += abs( piOrg[7] - piCur[7] ); | |
711 uiSum += abs( piOrg[8] - piCur[8] ); | |
712 uiSum += abs( piOrg[9] - piCur[9] ); | |
713 uiSum += abs( piOrg[10] - piCur[10] ); | |
714 uiSum += abs( piOrg[11] - piCur[11] ); | |
715 uiSum += abs( piOrg[12] - piCur[12] ); | |
716 uiSum += abs( piOrg[13] - piCur[13] ); | |
717 uiSum += abs( piOrg[14] - piCur[14] ); | |
718 uiSum += abs( piOrg[15] - piCur[15] ); | |
719 uiSum += abs( piOrg[16] - piCur[16] ); | |
720 uiSum += abs( piOrg[17] - piCur[17] ); | |
721 uiSum += abs( piOrg[18] - piCur[18] ); | |
722 uiSum += abs( piOrg[19] - piCur[19] ); | |
723 uiSum += abs( piOrg[20] - piCur[20] ); | |
724 uiSum += abs( piOrg[21] - piCur[21] ); | |
725 uiSum += abs( piOrg[22] - piCur[22] ); | |
726 uiSum += abs( piOrg[23] - piCur[23] ); | |
727 uiSum += abs( piOrg[24] - piCur[24] ); | |
728 uiSum += abs( piOrg[25] - piCur[25] ); | |
729 uiSum += abs( piOrg[26] - piCur[26] ); | |
730 uiSum += abs( piOrg[27] - piCur[27] ); | |
731 uiSum += abs( piOrg[28] - piCur[28] ); | |
732 uiSum += abs( piOrg[29] - piCur[29] ); | |
733 uiSum += abs( piOrg[30] - piCur[30] ); | |
734 uiSum += abs( piOrg[31] - piCur[31] ); | |
735 | |
736 piOrg += iStrideOrg; | |
737 piCur += iStrideCur; | |
738 } | |
739 | |
740 uiSum <<= iSubShift; | |
741 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
742 } | |
743 | |
744 #if AMP_SAD | |
745 Distortion TComRdCost::xGetSAD24( DistParam* pcDtParam ) | |
746 { | |
747 if ( pcDtParam->bApplyWeight ) | |
748 { | |
749 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
750 } | |
751 const Pel* piOrg = pcDtParam->pOrg; | |
752 const Pel* piCur = pcDtParam->pCur; | |
753 Int iRows = pcDtParam->iRows; | |
754 Int iSubShift = pcDtParam->iSubShift; | |
755 Int iSubStep = ( 1 << iSubShift ); | |
756 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
757 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
758 | |
759 Distortion uiSum = 0; | |
760 | |
761 for( ; iRows != 0; iRows-=iSubStep ) | |
762 { | |
763 uiSum += abs( piOrg[0] - piCur[0] ); | |
764 uiSum += abs( piOrg[1] - piCur[1] ); | |
765 uiSum += abs( piOrg[2] - piCur[2] ); | |
766 uiSum += abs( piOrg[3] - piCur[3] ); | |
767 uiSum += abs( piOrg[4] - piCur[4] ); | |
768 uiSum += abs( piOrg[5] - piCur[5] ); | |
769 uiSum += abs( piOrg[6] - piCur[6] ); | |
770 uiSum += abs( piOrg[7] - piCur[7] ); | |
771 uiSum += abs( piOrg[8] - piCur[8] ); | |
772 uiSum += abs( piOrg[9] - piCur[9] ); | |
773 uiSum += abs( piOrg[10] - piCur[10] ); | |
774 uiSum += abs( piOrg[11] - piCur[11] ); | |
775 uiSum += abs( piOrg[12] - piCur[12] ); | |
776 uiSum += abs( piOrg[13] - piCur[13] ); | |
777 uiSum += abs( piOrg[14] - piCur[14] ); | |
778 uiSum += abs( piOrg[15] - piCur[15] ); | |
779 uiSum += abs( piOrg[16] - piCur[16] ); | |
780 uiSum += abs( piOrg[17] - piCur[17] ); | |
781 uiSum += abs( piOrg[18] - piCur[18] ); | |
782 uiSum += abs( piOrg[19] - piCur[19] ); | |
783 uiSum += abs( piOrg[20] - piCur[20] ); | |
784 uiSum += abs( piOrg[21] - piCur[21] ); | |
785 uiSum += abs( piOrg[22] - piCur[22] ); | |
786 uiSum += abs( piOrg[23] - piCur[23] ); | |
787 | |
788 piOrg += iStrideOrg; | |
789 piCur += iStrideCur; | |
790 } | |
791 | |
792 uiSum <<= iSubShift; | |
793 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
794 } | |
795 | |
796 #endif | |
797 | |
798 Distortion TComRdCost::xGetSAD64( DistParam* pcDtParam ) | |
799 { | |
800 if ( pcDtParam->bApplyWeight ) | |
801 { | |
802 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
803 } | |
804 const Pel* piOrg = pcDtParam->pOrg; | |
805 const Pel* piCur = pcDtParam->pCur; | |
806 Int iRows = pcDtParam->iRows; | |
807 Int iSubShift = pcDtParam->iSubShift; | |
808 Int iSubStep = ( 1 << iSubShift ); | |
809 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
810 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
811 | |
812 Distortion uiSum = 0; | |
813 | |
814 for( ; iRows != 0; iRows-=iSubStep ) | |
815 { | |
816 uiSum += abs( piOrg[0] - piCur[0] ); | |
817 uiSum += abs( piOrg[1] - piCur[1] ); | |
818 uiSum += abs( piOrg[2] - piCur[2] ); | |
819 uiSum += abs( piOrg[3] - piCur[3] ); | |
820 uiSum += abs( piOrg[4] - piCur[4] ); | |
821 uiSum += abs( piOrg[5] - piCur[5] ); | |
822 uiSum += abs( piOrg[6] - piCur[6] ); | |
823 uiSum += abs( piOrg[7] - piCur[7] ); | |
824 uiSum += abs( piOrg[8] - piCur[8] ); | |
825 uiSum += abs( piOrg[9] - piCur[9] ); | |
826 uiSum += abs( piOrg[10] - piCur[10] ); | |
827 uiSum += abs( piOrg[11] - piCur[11] ); | |
828 uiSum += abs( piOrg[12] - piCur[12] ); | |
829 uiSum += abs( piOrg[13] - piCur[13] ); | |
830 uiSum += abs( piOrg[14] - piCur[14] ); | |
831 uiSum += abs( piOrg[15] - piCur[15] ); | |
832 uiSum += abs( piOrg[16] - piCur[16] ); | |
833 uiSum += abs( piOrg[17] - piCur[17] ); | |
834 uiSum += abs( piOrg[18] - piCur[18] ); | |
835 uiSum += abs( piOrg[19] - piCur[19] ); | |
836 uiSum += abs( piOrg[20] - piCur[20] ); | |
837 uiSum += abs( piOrg[21] - piCur[21] ); | |
838 uiSum += abs( piOrg[22] - piCur[22] ); | |
839 uiSum += abs( piOrg[23] - piCur[23] ); | |
840 uiSum += abs( piOrg[24] - piCur[24] ); | |
841 uiSum += abs( piOrg[25] - piCur[25] ); | |
842 uiSum += abs( piOrg[26] - piCur[26] ); | |
843 uiSum += abs( piOrg[27] - piCur[27] ); | |
844 uiSum += abs( piOrg[28] - piCur[28] ); | |
845 uiSum += abs( piOrg[29] - piCur[29] ); | |
846 uiSum += abs( piOrg[30] - piCur[30] ); | |
847 uiSum += abs( piOrg[31] - piCur[31] ); | |
848 uiSum += abs( piOrg[32] - piCur[32] ); | |
849 uiSum += abs( piOrg[33] - piCur[33] ); | |
850 uiSum += abs( piOrg[34] - piCur[34] ); | |
851 uiSum += abs( piOrg[35] - piCur[35] ); | |
852 uiSum += abs( piOrg[36] - piCur[36] ); | |
853 uiSum += abs( piOrg[37] - piCur[37] ); | |
854 uiSum += abs( piOrg[38] - piCur[38] ); | |
855 uiSum += abs( piOrg[39] - piCur[39] ); | |
856 uiSum += abs( piOrg[40] - piCur[40] ); | |
857 uiSum += abs( piOrg[41] - piCur[41] ); | |
858 uiSum += abs( piOrg[42] - piCur[42] ); | |
859 uiSum += abs( piOrg[43] - piCur[43] ); | |
860 uiSum += abs( piOrg[44] - piCur[44] ); | |
861 uiSum += abs( piOrg[45] - piCur[45] ); | |
862 uiSum += abs( piOrg[46] - piCur[46] ); | |
863 uiSum += abs( piOrg[47] - piCur[47] ); | |
864 uiSum += abs( piOrg[48] - piCur[48] ); | |
865 uiSum += abs( piOrg[49] - piCur[49] ); | |
866 uiSum += abs( piOrg[50] - piCur[50] ); | |
867 uiSum += abs( piOrg[51] - piCur[51] ); | |
868 uiSum += abs( piOrg[52] - piCur[52] ); | |
869 uiSum += abs( piOrg[53] - piCur[53] ); | |
870 uiSum += abs( piOrg[54] - piCur[54] ); | |
871 uiSum += abs( piOrg[55] - piCur[55] ); | |
872 uiSum += abs( piOrg[56] - piCur[56] ); | |
873 uiSum += abs( piOrg[57] - piCur[57] ); | |
874 uiSum += abs( piOrg[58] - piCur[58] ); | |
875 uiSum += abs( piOrg[59] - piCur[59] ); | |
876 uiSum += abs( piOrg[60] - piCur[60] ); | |
877 uiSum += abs( piOrg[61] - piCur[61] ); | |
878 uiSum += abs( piOrg[62] - piCur[62] ); | |
879 uiSum += abs( piOrg[63] - piCur[63] ); | |
880 | |
881 piOrg += iStrideOrg; | |
882 piCur += iStrideCur; | |
883 } | |
884 | |
885 uiSum <<= iSubShift; | |
886 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
887 } | |
888 | |
889 #if AMP_SAD | |
890 Distortion TComRdCost::xGetSAD48( DistParam* pcDtParam ) | |
891 { | |
892 if ( pcDtParam->bApplyWeight ) | |
893 { | |
894 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); | |
895 } | |
896 const Pel* piOrg = pcDtParam->pOrg; | |
897 const Pel* piCur = pcDtParam->pCur; | |
898 Int iRows = pcDtParam->iRows; | |
899 Int iSubShift = pcDtParam->iSubShift; | |
900 Int iSubStep = ( 1 << iSubShift ); | |
901 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; | |
902 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; | |
903 | |
904 Distortion uiSum = 0; | |
905 | |
906 for( ; iRows != 0; iRows-=iSubStep ) | |
907 { | |
908 uiSum += abs( piOrg[0] - piCur[0] ); | |
909 uiSum += abs( piOrg[1] - piCur[1] ); | |
910 uiSum += abs( piOrg[2] - piCur[2] ); | |
911 uiSum += abs( piOrg[3] - piCur[3] ); | |
912 uiSum += abs( piOrg[4] - piCur[4] ); | |
913 uiSum += abs( piOrg[5] - piCur[5] ); | |
914 uiSum += abs( piOrg[6] - piCur[6] ); | |
915 uiSum += abs( piOrg[7] - piCur[7] ); | |
916 uiSum += abs( piOrg[8] - piCur[8] ); | |
917 uiSum += abs( piOrg[9] - piCur[9] ); | |
918 uiSum += abs( piOrg[10] - piCur[10] ); | |
919 uiSum += abs( piOrg[11] - piCur[11] ); | |
920 uiSum += abs( piOrg[12] - piCur[12] ); | |
921 uiSum += abs( piOrg[13] - piCur[13] ); | |
922 uiSum += abs( piOrg[14] - piCur[14] ); | |
923 uiSum += abs( piOrg[15] - piCur[15] ); | |
924 uiSum += abs( piOrg[16] - piCur[16] ); | |
925 uiSum += abs( piOrg[17] - piCur[17] ); | |
926 uiSum += abs( piOrg[18] - piCur[18] ); | |
927 uiSum += abs( piOrg[19] - piCur[19] ); | |
928 uiSum += abs( piOrg[20] - piCur[20] ); | |
929 uiSum += abs( piOrg[21] - piCur[21] ); | |
930 uiSum += abs( piOrg[22] - piCur[22] ); | |
931 uiSum += abs( piOrg[23] - piCur[23] ); | |
932 uiSum += abs( piOrg[24] - piCur[24] ); | |
933 uiSum += abs( piOrg[25] - piCur[25] ); | |
934 uiSum += abs( piOrg[26] - piCur[26] ); | |
935 uiSum += abs( piOrg[27] - piCur[27] ); | |
936 uiSum += abs( piOrg[28] - piCur[28] ); | |
937 uiSum += abs( piOrg[29] - piCur[29] ); | |
938 uiSum += abs( piOrg[30] - piCur[30] ); | |
939 uiSum += abs( piOrg[31] - piCur[31] ); | |
940 uiSum += abs( piOrg[32] - piCur[32] ); | |
941 uiSum += abs( piOrg[33] - piCur[33] ); | |
942 uiSum += abs( piOrg[34] - piCur[34] ); | |
943 uiSum += abs( piOrg[35] - piCur[35] ); | |
944 uiSum += abs( piOrg[36] - piCur[36] ); | |
945 uiSum += abs( piOrg[37] - piCur[37] ); | |
946 uiSum += abs( piOrg[38] - piCur[38] ); | |
947 uiSum += abs( piOrg[39] - piCur[39] ); | |
948 uiSum += abs( piOrg[40] - piCur[40] ); | |
949 uiSum += abs( piOrg[41] - piCur[41] ); | |
950 uiSum += abs( piOrg[42] - piCur[42] ); | |
951 uiSum += abs( piOrg[43] - piCur[43] ); | |
952 uiSum += abs( piOrg[44] - piCur[44] ); | |
953 uiSum += abs( piOrg[45] - piCur[45] ); | |
954 uiSum += abs( piOrg[46] - piCur[46] ); | |
955 uiSum += abs( piOrg[47] - piCur[47] ); | |
956 | |
957 piOrg += iStrideOrg; | |
958 piCur += iStrideCur; | |
959 } | |
960 | |
961 uiSum <<= iSubShift; | |
962 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
963 } | |
964 #endif | |
965 | |
966 // -------------------------------------------------------------------------------------------------------------------- | |
967 // SSE | |
968 // -------------------------------------------------------------------------------------------------------------------- | |
969 | |
970 Distortion TComRdCost::xGetSSE( DistParam* pcDtParam ) | |
971 { | |
972 if ( pcDtParam->bApplyWeight ) | |
973 { | |
974 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam ); | |
975 } | |
976 const Pel* piOrg = pcDtParam->pOrg; | |
977 const Pel* piCur = pcDtParam->pCur; | |
978 Int iRows = pcDtParam->iRows; | |
979 Int iCols = pcDtParam->iCols; | |
980 Int iStrideOrg = pcDtParam->iStrideOrg; | |
981 Int iStrideCur = pcDtParam->iStrideCur; | |
982 | |
983 Distortion uiSum = 0; | |
984 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1); | |
985 | |
986 Intermediate_Int iTemp; | |
987 | |
988 for( ; iRows != 0; iRows-- ) | |
989 { | |
990 for (Int n = 0; n < iCols; n++ ) | |
991 { | |
992 iTemp = piOrg[n ] - piCur[n ]; | |
993 uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
994 } | |
995 piOrg += iStrideOrg; | |
996 piCur += iStrideCur; | |
997 } | |
998 | |
999 return ( uiSum ); | |
1000 } | |
1001 | |
1002 Distortion TComRdCost::xGetSSE4( DistParam* pcDtParam ) | |
1003 { | |
1004 if ( pcDtParam->bApplyWeight ) | |
1005 { | |
1006 assert( pcDtParam->iCols == 4 ); | |
1007 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam ); | |
1008 } | |
1009 const Pel* piOrg = pcDtParam->pOrg; | |
1010 const Pel* piCur = pcDtParam->pCur; | |
1011 Int iRows = pcDtParam->iRows; | |
1012 Int iStrideOrg = pcDtParam->iStrideOrg; | |
1013 Int iStrideCur = pcDtParam->iStrideCur; | |
1014 | |
1015 Distortion uiSum = 0; | |
1016 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1); | |
1017 | |
1018 Intermediate_Int iTemp; | |
1019 | |
1020 for( ; iRows != 0; iRows-- ) | |
1021 { | |
1022 | |
1023 iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1024 iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1025 iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1026 iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1027 | |
1028 piOrg += iStrideOrg; | |
1029 piCur += iStrideCur; | |
1030 } | |
1031 | |
1032 return ( uiSum ); | |
1033 } | |
1034 | |
1035 Distortion TComRdCost::xGetSSE8( DistParam* pcDtParam ) | |
1036 { | |
1037 if ( pcDtParam->bApplyWeight ) | |
1038 { | |
1039 assert( pcDtParam->iCols == 8 ); | |
1040 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam ); | |
1041 } | |
1042 const Pel* piOrg = pcDtParam->pOrg; | |
1043 const Pel* piCur = pcDtParam->pCur; | |
1044 Int iRows = pcDtParam->iRows; | |
1045 Int iStrideOrg = pcDtParam->iStrideOrg; | |
1046 Int iStrideCur = pcDtParam->iStrideCur; | |
1047 | |
1048 Distortion uiSum = 0; | |
1049 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1); | |
1050 | |
1051 Intermediate_Int iTemp; | |
1052 | |
1053 for( ; iRows != 0; iRows-- ) | |
1054 { | |
1055 iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1056 iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1057 iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1058 iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1059 iTemp = piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1060 iTemp = piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1061 iTemp = piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1062 iTemp = piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1063 | |
1064 piOrg += iStrideOrg; | |
1065 piCur += iStrideCur; | |
1066 } | |
1067 | |
1068 return ( uiSum ); | |
1069 } | |
1070 | |
1071 Distortion TComRdCost::xGetSSE16( DistParam* pcDtParam ) | |
1072 { | |
1073 if ( pcDtParam->bApplyWeight ) | |
1074 { | |
1075 assert( pcDtParam->iCols == 16 ); | |
1076 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam ); | |
1077 } | |
1078 const Pel* piOrg = pcDtParam->pOrg; | |
1079 const Pel* piCur = pcDtParam->pCur; | |
1080 Int iRows = pcDtParam->iRows; | |
1081 Int iStrideOrg = pcDtParam->iStrideOrg; | |
1082 Int iStrideCur = pcDtParam->iStrideCur; | |
1083 | |
1084 Distortion uiSum = 0; | |
1085 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1); | |
1086 | |
1087 Intermediate_Int iTemp; | |
1088 | |
1089 for( ; iRows != 0; iRows-- ) | |
1090 { | |
1091 | |
1092 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1093 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1094 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1095 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1096 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1097 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1098 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1099 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1100 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1101 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1102 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1103 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1104 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1105 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1106 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1107 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1108 | |
1109 piOrg += iStrideOrg; | |
1110 piCur += iStrideCur; | |
1111 } | |
1112 | |
1113 return ( uiSum ); | |
1114 } | |
1115 | |
1116 Distortion TComRdCost::xGetSSE16N( DistParam* pcDtParam ) | |
1117 { | |
1118 if ( pcDtParam->bApplyWeight ) | |
1119 { | |
1120 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam ); | |
1121 } | |
1122 const Pel* piOrg = pcDtParam->pOrg; | |
1123 const Pel* piCur = pcDtParam->pCur; | |
1124 Int iRows = pcDtParam->iRows; | |
1125 Int iCols = pcDtParam->iCols; | |
1126 Int iStrideOrg = pcDtParam->iStrideOrg; | |
1127 Int iStrideCur = pcDtParam->iStrideCur; | |
1128 | |
1129 Distortion uiSum = 0; | |
1130 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1); | |
1131 | |
1132 Intermediate_Int iTemp; | |
1133 | |
1134 for( ; iRows != 0; iRows-- ) | |
1135 { | |
1136 for (Int n = 0; n < iCols; n+=16 ) | |
1137 { | |
1138 | |
1139 iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1140 iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1141 iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1142 iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1143 iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1144 iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1145 iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1146 iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1147 iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1148 iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1149 iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1150 iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1151 iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1152 iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1153 iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1154 iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1155 | |
1156 } | |
1157 piOrg += iStrideOrg; | |
1158 piCur += iStrideCur; | |
1159 } | |
1160 | |
1161 return ( uiSum ); | |
1162 } | |
1163 | |
1164 Distortion TComRdCost::xGetSSE32( DistParam* pcDtParam ) | |
1165 { | |
1166 if ( pcDtParam->bApplyWeight ) | |
1167 { | |
1168 assert( pcDtParam->iCols == 32 ); | |
1169 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam ); | |
1170 } | |
1171 const Pel* piOrg = pcDtParam->pOrg; | |
1172 const Pel* piCur = pcDtParam->pCur; | |
1173 Int iRows = pcDtParam->iRows; | |
1174 Int iStrideOrg = pcDtParam->iStrideOrg; | |
1175 Int iStrideCur = pcDtParam->iStrideCur; | |
1176 | |
1177 Distortion uiSum = 0; | |
1178 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1); | |
1179 | |
1180 Intermediate_Int iTemp; | |
1181 | |
1182 for( ; iRows != 0; iRows-- ) | |
1183 { | |
1184 | |
1185 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1186 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1187 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1188 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1189 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1190 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1191 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1192 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1193 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1194 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1195 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1196 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1197 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1198 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1199 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1200 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1201 iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1202 iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1203 iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1204 iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1205 iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1206 iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1207 iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1208 iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1209 iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1210 iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1211 iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1212 iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1213 iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1214 iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1215 iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1216 iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1217 | |
1218 piOrg += iStrideOrg; | |
1219 piCur += iStrideCur; | |
1220 } | |
1221 | |
1222 return ( uiSum ); | |
1223 } | |
1224 | |
1225 Distortion TComRdCost::xGetSSE64( DistParam* pcDtParam ) | |
1226 { | |
1227 if ( pcDtParam->bApplyWeight ) | |
1228 { | |
1229 assert( pcDtParam->iCols == 64 ); | |
1230 return TComRdCostWeightPrediction::xGetSSEw( pcDtParam ); | |
1231 } | |
1232 const Pel* piOrg = pcDtParam->pOrg; | |
1233 const Pel* piCur = pcDtParam->pCur; | |
1234 Int iRows = pcDtParam->iRows; | |
1235 Int iStrideOrg = pcDtParam->iStrideOrg; | |
1236 Int iStrideCur = pcDtParam->iStrideCur; | |
1237 | |
1238 Distortion uiSum = 0; | |
1239 UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1); | |
1240 | |
1241 Intermediate_Int iTemp; | |
1242 | |
1243 for( ; iRows != 0; iRows-- ) | |
1244 { | |
1245 iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1246 iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1247 iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1248 iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1249 iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1250 iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1251 iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1252 iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1253 iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1254 iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1255 iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1256 iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1257 iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1258 iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1259 iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1260 iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1261 iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1262 iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1263 iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1264 iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1265 iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1266 iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1267 iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1268 iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1269 iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1270 iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1271 iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1272 iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1273 iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1274 iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1275 iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1276 iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1277 iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1278 iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1279 iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1280 iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1281 iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1282 iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1283 iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1284 iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1285 iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1286 iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1287 iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1288 iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1289 iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1290 iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1291 iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1292 iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1293 iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1294 iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1295 iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1296 iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1297 iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1298 iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1299 iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1300 iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1301 iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1302 iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1303 iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1304 iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1305 iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1306 iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1307 iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1308 iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); | |
1309 | |
1310 piOrg += iStrideOrg; | |
1311 piCur += iStrideCur; | |
1312 } | |
1313 | |
1314 return ( uiSum ); | |
1315 } | |
1316 | |
1317 // -------------------------------------------------------------------------------------------------------------------- | |
1318 // HADAMARD with step (used in fractional search) | |
1319 // -------------------------------------------------------------------------------------------------------------------- | |
1320 | |
1321 Distortion TComRdCost::xCalcHADs2x2( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) | |
1322 { | |
1323 Distortion satd = 0; | |
1324 TCoeff diff[4], m[4]; | |
1325 assert( iStep == 1 ); | |
1326 diff[0] = piOrg[0 ] - piCur[0]; | |
1327 diff[1] = piOrg[1 ] - piCur[1]; | |
1328 diff[2] = piOrg[iStrideOrg ] - piCur[0 + iStrideCur]; | |
1329 diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur]; | |
1330 m[0] = diff[0] + diff[2]; | |
1331 m[1] = diff[1] + diff[3]; | |
1332 m[2] = diff[0] - diff[2]; | |
1333 m[3] = diff[1] - diff[3]; | |
1334 | |
1335 satd += abs(m[0] + m[1]); | |
1336 satd += abs(m[0] - m[1]); | |
1337 satd += abs(m[2] + m[3]); | |
1338 satd += abs(m[2] - m[3]); | |
1339 | |
1340 return satd; | |
1341 } | |
1342 | |
1343 Distortion TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) | |
1344 { | |
1345 Int k; | |
1346 Distortion satd = 0; | |
1347 TCoeff diff[16], m[16], d[16]; | |
1348 | |
1349 assert( iStep == 1 ); | |
1350 for( k = 0; k < 16; k+=4 ) | |
1351 { | |
1352 diff[k+0] = piOrg[0] - piCur[0]; | |
1353 diff[k+1] = piOrg[1] - piCur[1]; | |
1354 diff[k+2] = piOrg[2] - piCur[2]; | |
1355 diff[k+3] = piOrg[3] - piCur[3]; | |
1356 | |
1357 piCur += iStrideCur; | |
1358 piOrg += iStrideOrg; | |
1359 } | |
1360 | |
1361 /*===== hadamard transform =====*/ | |
1362 m[ 0] = diff[ 0] + diff[12]; | |
1363 m[ 1] = diff[ 1] + diff[13]; | |
1364 m[ 2] = diff[ 2] + diff[14]; | |
1365 m[ 3] = diff[ 3] + diff[15]; | |
1366 m[ 4] = diff[ 4] + diff[ 8]; | |
1367 m[ 5] = diff[ 5] + diff[ 9]; | |
1368 m[ 6] = diff[ 6] + diff[10]; | |
1369 m[ 7] = diff[ 7] + diff[11]; | |
1370 m[ 8] = diff[ 4] - diff[ 8]; | |
1371 m[ 9] = diff[ 5] - diff[ 9]; | |
1372 m[10] = diff[ 6] - diff[10]; | |
1373 m[11] = diff[ 7] - diff[11]; | |
1374 m[12] = diff[ 0] - diff[12]; | |
1375 m[13] = diff[ 1] - diff[13]; | |
1376 m[14] = diff[ 2] - diff[14]; | |
1377 m[15] = diff[ 3] - diff[15]; | |
1378 | |
1379 d[ 0] = m[ 0] + m[ 4]; | |
1380 d[ 1] = m[ 1] + m[ 5]; | |
1381 d[ 2] = m[ 2] + m[ 6]; | |
1382 d[ 3] = m[ 3] + m[ 7]; | |
1383 d[ 4] = m[ 8] + m[12]; | |
1384 d[ 5] = m[ 9] + m[13]; | |
1385 d[ 6] = m[10] + m[14]; | |
1386 d[ 7] = m[11] + m[15]; | |
1387 d[ 8] = m[ 0] - m[ 4]; | |
1388 d[ 9] = m[ 1] - m[ 5]; | |
1389 d[10] = m[ 2] - m[ 6]; | |
1390 d[11] = m[ 3] - m[ 7]; | |
1391 d[12] = m[12] - m[ 8]; | |
1392 d[13] = m[13] - m[ 9]; | |
1393 d[14] = m[14] - m[10]; | |
1394 d[15] = m[15] - m[11]; | |
1395 | |
1396 m[ 0] = d[ 0] + d[ 3]; | |
1397 m[ 1] = d[ 1] + d[ 2]; | |
1398 m[ 2] = d[ 1] - d[ 2]; | |
1399 m[ 3] = d[ 0] - d[ 3]; | |
1400 m[ 4] = d[ 4] + d[ 7]; | |
1401 m[ 5] = d[ 5] + d[ 6]; | |
1402 m[ 6] = d[ 5] - d[ 6]; | |
1403 m[ 7] = d[ 4] - d[ 7]; | |
1404 m[ 8] = d[ 8] + d[11]; | |
1405 m[ 9] = d[ 9] + d[10]; | |
1406 m[10] = d[ 9] - d[10]; | |
1407 m[11] = d[ 8] - d[11]; | |
1408 m[12] = d[12] + d[15]; | |
1409 m[13] = d[13] + d[14]; | |
1410 m[14] = d[13] - d[14]; | |
1411 m[15] = d[12] - d[15]; | |
1412 | |
1413 d[ 0] = m[ 0] + m[ 1]; | |
1414 d[ 1] = m[ 0] - m[ 1]; | |
1415 d[ 2] = m[ 2] + m[ 3]; | |
1416 d[ 3] = m[ 3] - m[ 2]; | |
1417 d[ 4] = m[ 4] + m[ 5]; | |
1418 d[ 5] = m[ 4] - m[ 5]; | |
1419 d[ 6] = m[ 6] + m[ 7]; | |
1420 d[ 7] = m[ 7] - m[ 6]; | |
1421 d[ 8] = m[ 8] + m[ 9]; | |
1422 d[ 9] = m[ 8] - m[ 9]; | |
1423 d[10] = m[10] + m[11]; | |
1424 d[11] = m[11] - m[10]; | |
1425 d[12] = m[12] + m[13]; | |
1426 d[13] = m[12] - m[13]; | |
1427 d[14] = m[14] + m[15]; | |
1428 d[15] = m[15] - m[14]; | |
1429 | |
1430 for (k=0; k<16; ++k) | |
1431 { | |
1432 satd += abs(d[k]); | |
1433 } | |
1434 satd = ((satd+1)>>1); | |
1435 | |
1436 return satd; | |
1437 } | |
1438 | |
1439 Distortion TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) | |
1440 { | |
1441 Int k, i, j, jj; | |
1442 Distortion sad = 0; | |
1443 TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8]; | |
1444 assert( iStep == 1 ); | |
1445 for( k = 0; k < 64; k += 8 ) | |
1446 { | |
1447 diff[k+0] = piOrg[0] - piCur[0]; | |
1448 diff[k+1] = piOrg[1] - piCur[1]; | |
1449 diff[k+2] = piOrg[2] - piCur[2]; | |
1450 diff[k+3] = piOrg[3] - piCur[3]; | |
1451 diff[k+4] = piOrg[4] - piCur[4]; | |
1452 diff[k+5] = piOrg[5] - piCur[5]; | |
1453 diff[k+6] = piOrg[6] - piCur[6]; | |
1454 diff[k+7] = piOrg[7] - piCur[7]; | |
1455 | |
1456 piCur += iStrideCur; | |
1457 piOrg += iStrideOrg; | |
1458 } | |
1459 | |
1460 //horizontal | |
1461 for (j=0; j < 8; j++) | |
1462 { | |
1463 jj = j << 3; | |
1464 m2[j][0] = diff[jj ] + diff[jj+4]; | |
1465 m2[j][1] = diff[jj+1] + diff[jj+5]; | |
1466 m2[j][2] = diff[jj+2] + diff[jj+6]; | |
1467 m2[j][3] = diff[jj+3] + diff[jj+7]; | |
1468 m2[j][4] = diff[jj ] - diff[jj+4]; | |
1469 m2[j][5] = diff[jj+1] - diff[jj+5]; | |
1470 m2[j][6] = diff[jj+2] - diff[jj+6]; | |
1471 m2[j][7] = diff[jj+3] - diff[jj+7]; | |
1472 | |
1473 m1[j][0] = m2[j][0] + m2[j][2]; | |
1474 m1[j][1] = m2[j][1] + m2[j][3]; | |
1475 m1[j][2] = m2[j][0] - m2[j][2]; | |
1476 m1[j][3] = m2[j][1] - m2[j][3]; | |
1477 m1[j][4] = m2[j][4] + m2[j][6]; | |
1478 m1[j][5] = m2[j][5] + m2[j][7]; | |
1479 m1[j][6] = m2[j][4] - m2[j][6]; | |
1480 m1[j][7] = m2[j][5] - m2[j][7]; | |
1481 | |
1482 m2[j][0] = m1[j][0] + m1[j][1]; | |
1483 m2[j][1] = m1[j][0] - m1[j][1]; | |
1484 m2[j][2] = m1[j][2] + m1[j][3]; | |
1485 m2[j][3] = m1[j][2] - m1[j][3]; | |
1486 m2[j][4] = m1[j][4] + m1[j][5]; | |
1487 m2[j][5] = m1[j][4] - m1[j][5]; | |
1488 m2[j][6] = m1[j][6] + m1[j][7]; | |
1489 m2[j][7] = m1[j][6] - m1[j][7]; | |
1490 } | |
1491 | |
1492 //vertical | |
1493 for (i=0; i < 8; i++) | |
1494 { | |
1495 m3[0][i] = m2[0][i] + m2[4][i]; | |
1496 m3[1][i] = m2[1][i] + m2[5][i]; | |
1497 m3[2][i] = m2[2][i] + m2[6][i]; | |
1498 m3[3][i] = m2[3][i] + m2[7][i]; | |
1499 m3[4][i] = m2[0][i] - m2[4][i]; | |
1500 m3[5][i] = m2[1][i] - m2[5][i]; | |
1501 m3[6][i] = m2[2][i] - m2[6][i]; | |
1502 m3[7][i] = m2[3][i] - m2[7][i]; | |
1503 | |
1504 m1[0][i] = m3[0][i] + m3[2][i]; | |
1505 m1[1][i] = m3[1][i] + m3[3][i]; | |
1506 m1[2][i] = m3[0][i] - m3[2][i]; | |
1507 m1[3][i] = m3[1][i] - m3[3][i]; | |
1508 m1[4][i] = m3[4][i] + m3[6][i]; | |
1509 m1[5][i] = m3[5][i] + m3[7][i]; | |
1510 m1[6][i] = m3[4][i] - m3[6][i]; | |
1511 m1[7][i] = m3[5][i] - m3[7][i]; | |
1512 | |
1513 m2[0][i] = m1[0][i] + m1[1][i]; | |
1514 m2[1][i] = m1[0][i] - m1[1][i]; | |
1515 m2[2][i] = m1[2][i] + m1[3][i]; | |
1516 m2[3][i] = m1[2][i] - m1[3][i]; | |
1517 m2[4][i] = m1[4][i] + m1[5][i]; | |
1518 m2[5][i] = m1[4][i] - m1[5][i]; | |
1519 m2[6][i] = m1[6][i] + m1[7][i]; | |
1520 m2[7][i] = m1[6][i] - m1[7][i]; | |
1521 } | |
1522 | |
1523 for (i = 0; i < 8; i++) | |
1524 { | |
1525 for (j = 0; j < 8; j++) | |
1526 { | |
1527 sad += abs(m2[i][j]); | |
1528 } | |
1529 } | |
1530 | |
1531 sad=((sad+2)>>2); | |
1532 | |
1533 return sad; | |
1534 } | |
1535 | |
1536 | |
1537 Distortion TComRdCost::xGetHADs( DistParam* pcDtParam ) | |
1538 { | |
1539 if ( pcDtParam->bApplyWeight ) | |
1540 { | |
1541 return TComRdCostWeightPrediction::xGetHADsw( pcDtParam ); | |
1542 } | |
1543 Pel* piOrg = pcDtParam->pOrg; | |
1544 Pel* piCur = pcDtParam->pCur; | |
1545 Int iRows = pcDtParam->iRows; | |
1546 Int iCols = pcDtParam->iCols; | |
1547 Int iStrideCur = pcDtParam->iStrideCur; | |
1548 Int iStrideOrg = pcDtParam->iStrideOrg; | |
1549 Int iStep = pcDtParam->iStep; | |
1550 | |
1551 Int x, y; | |
1552 | |
1553 Distortion uiSum = 0; | |
1554 | |
1555 if( ( iRows % 8 == 0) && (iCols % 8 == 0) ) | |
1556 { | |
1557 Int iOffsetOrg = iStrideOrg<<3; | |
1558 Int iOffsetCur = iStrideCur<<3; | |
1559 for ( y=0; y<iRows; y+= 8 ) | |
1560 { | |
1561 for ( x=0; x<iCols; x+= 8 ) | |
1562 { | |
1563 uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); | |
1564 } | |
1565 piOrg += iOffsetOrg; | |
1566 piCur += iOffsetCur; | |
1567 } | |
1568 } | |
1569 else if( ( iRows % 4 == 0) && (iCols % 4 == 0) ) | |
1570 { | |
1571 Int iOffsetOrg = iStrideOrg<<2; | |
1572 Int iOffsetCur = iStrideCur<<2; | |
1573 | |
1574 for ( y=0; y<iRows; y+= 4 ) | |
1575 { | |
1576 for ( x=0; x<iCols; x+= 4 ) | |
1577 { | |
1578 uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); | |
1579 } | |
1580 piOrg += iOffsetOrg; | |
1581 piCur += iOffsetCur; | |
1582 } | |
1583 } | |
1584 else if( ( iRows % 2 == 0) && (iCols % 2 == 0) ) | |
1585 { | |
1586 Int iOffsetOrg = iStrideOrg<<1; | |
1587 Int iOffsetCur = iStrideCur<<1; | |
1588 for ( y=0; y<iRows; y+=2 ) | |
1589 { | |
1590 for ( x=0; x<iCols; x+=2 ) | |
1591 { | |
1592 uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); | |
1593 } | |
1594 piOrg += iOffsetOrg; | |
1595 piCur += iOffsetCur; | |
1596 } | |
1597 } | |
1598 else | |
1599 { | |
1600 assert(false); | |
1601 } | |
1602 | |
1603 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) ); | |
1604 } | |
1605 | |
1606 //! \} |