Mercurial > hg > forks > libbpg
comparison x265/source/encoder/analysis.cpp @ 0:772086c29cc7
Initial import.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 16 Nov 2016 11:16:33 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:772086c29cc7 |
---|---|
1 /***************************************************************************** | |
2 * Copyright (C) 2013 x265 project | |
3 * | |
4 * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com> | |
5 * Steve Borho <steve@borho.org> | |
6 * | |
7 * This program is free software; you can redistribute it and/or modify | |
8 * it under the terms of the GNU General Public License as published by | |
9 * the Free Software Foundation; either version 2 of the License, or | |
10 * (at your option) any later version. | |
11 * | |
12 * This program is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU General Public License | |
18 * along with this program; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
20 * | |
21 * This program is also available under a commercial proprietary license. | |
22 * For more information, contact us at license @ x265.com. | |
23 *****************************************************************************/ | |
24 | |
25 #include "common.h" | |
26 #include "frame.h" | |
27 #include "framedata.h" | |
28 #include "picyuv.h" | |
29 #include "primitives.h" | |
30 #include "threading.h" | |
31 | |
32 #include "analysis.h" | |
33 #include "rdcost.h" | |
34 #include "encoder.h" | |
35 | |
36 using namespace X265_NS; | |
37 | |
38 /* An explanation of rate distortion levels (--rd-level) | |
39 * | |
40 * rd-level 0 generates no recon per CU (NO RDO or Quant) | |
41 * | |
42 * sa8d selection between merge / skip / inter / intra and split | |
43 * no recon pixels generated until CTU analysis is complete, requiring | |
44 * intra predictions to use source pixels | |
45 * | |
46 * rd-level 1 uses RDO for merge and skip, sa8d for all else | |
47 * | |
48 * RDO selection between merge and skip | |
49 * sa8d selection between (merge/skip) / inter modes / intra and split | |
50 * intra prediction uses reconstructed pixels | |
51 * | |
52 * rd-level 2 uses RDO for merge/skip and split | |
53 * | |
54 * RDO selection between merge and skip | |
55 * sa8d selection between (merge/skip) / inter modes / intra | |
56 * RDO split decisions | |
57 * | |
58 * rd-level 3 uses RDO for merge/skip/best inter/intra | |
59 * | |
60 * RDO selection between merge and skip | |
61 * sa8d selection of best inter mode | |
62 * sa8d decisions include chroma residual cost | |
63 * RDO selection between (merge/skip) / best inter mode / intra / split | |
64 * | |
65 * rd-level 4 enables RDOQuant | |
66 * chroma residual cost included in satd decisions, including subpel refine | |
67 * (as a result of --subme 3 being used by preset slow) | |
68 * | |
69 * rd-level 5,6 does RDO for each inter mode | |
70 */ | |
71 | |
72 Analysis::Analysis() | |
73 { | |
74 m_reuseIntraDataCTU = NULL; | |
75 m_reuseInterDataCTU = NULL; | |
76 m_reuseRef = NULL; | |
77 m_reuseBestMergeCand = NULL; | |
78 } | |
79 | |
80 bool Analysis::create(ThreadLocalData *tld) | |
81 { | |
82 m_tld = tld; | |
83 m_bTryLossless = m_param->bCULossless && !m_param->bLossless && m_param->rdLevel >= 2; | |
84 m_bChromaSa8d = m_param->rdLevel >= 3; | |
85 | |
86 int csp = m_param->internalCsp; | |
87 uint32_t cuSize = g_maxCUSize; | |
88 | |
89 bool ok = true; | |
90 for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++, cuSize >>= 1) | |
91 { | |
92 ModeDepth &md = m_modeDepth[depth]; | |
93 | |
94 md.cuMemPool.create(depth, csp, MAX_PRED_TYPES); | |
95 ok &= md.fencYuv.create(cuSize, csp); | |
96 | |
97 for (int j = 0; j < MAX_PRED_TYPES; j++) | |
98 { | |
99 md.pred[j].cu.initialize(md.cuMemPool, depth, csp, j); | |
100 ok &= md.pred[j].predYuv.create(cuSize, csp); | |
101 ok &= md.pred[j].reconYuv.create(cuSize, csp); | |
102 md.pred[j].fencYuv = &md.fencYuv; | |
103 } | |
104 } | |
105 | |
106 return ok; | |
107 } | |
108 | |
109 void Analysis::destroy() | |
110 { | |
111 for (uint32_t i = 0; i <= g_maxCUDepth; i++) | |
112 { | |
113 m_modeDepth[i].cuMemPool.destroy(); | |
114 m_modeDepth[i].fencYuv.destroy(); | |
115 | |
116 for (int j = 0; j < MAX_PRED_TYPES; j++) | |
117 { | |
118 m_modeDepth[i].pred[j].predYuv.destroy(); | |
119 m_modeDepth[i].pred[j].reconYuv.destroy(); | |
120 } | |
121 } | |
122 } | |
123 | |
124 Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext) | |
125 { | |
126 m_slice = ctu.m_slice; | |
127 m_frame = &frame; | |
128 | |
129 #if _DEBUG || CHECKED_BUILD | |
130 for (uint32_t i = 0; i <= g_maxCUDepth; i++) | |
131 for (uint32_t j = 0; j < MAX_PRED_TYPES; j++) | |
132 m_modeDepth[i].pred[j].invalidate(); | |
133 invalidateContexts(0); | |
134 #endif | |
135 | |
136 int qp = setLambdaFromQP(ctu, m_slice->m_pps->bUseDQP ? calculateQpforCuSize(ctu, cuGeom) : m_slice->m_sliceQp); | |
137 ctu.setQPSubParts((int8_t)qp, 0, 0); | |
138 | |
139 m_rqt[0].cur.load(initialContext); | |
140 m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0); | |
141 | |
142 uint32_t numPartition = ctu.m_numPartitions; | |
143 if (m_param->analysisMode) | |
144 { | |
145 if (m_slice->m_sliceType == I_SLICE) | |
146 m_reuseIntraDataCTU = (analysis_intra_data*)m_frame->m_analysisData.intraData; | |
147 else | |
148 { | |
149 int numPredDir = m_slice->isInterP() ? 1 : 2; | |
150 m_reuseInterDataCTU = (analysis_inter_data*)m_frame->m_analysisData.interData; | |
151 m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir]; | |
152 m_reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS]; | |
153 } | |
154 } | |
155 | |
156 ProfileCUScope(ctu, totalCTUTime, totalCTUs); | |
157 | |
158 uint32_t zOrder = 0; | |
159 if (m_slice->m_sliceType == I_SLICE) | |
160 { | |
161 compressIntraCU(ctu, cuGeom, zOrder, qp); | |
162 if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData) | |
163 { | |
164 CUData* bestCU = &m_modeDepth[0].bestMode->cu; | |
165 memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition); | |
166 memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition); | |
167 memcpy(&m_reuseIntraDataCTU->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition); | |
168 memcpy(&m_reuseIntraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], bestCU->m_chromaIntraDir, sizeof(uint8_t) * numPartition); | |
169 } | |
170 } | |
171 else | |
172 { | |
173 if (!m_param->rdLevel) | |
174 { | |
175 /* In RD Level 0/1, copy source pixels into the reconstructed block so | |
176 * they are available for intra predictions */ | |
177 m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0); | |
178 | |
179 compressInterCU_rd0_4(ctu, cuGeom, qp); | |
180 | |
181 /* generate residual for entire CTU at once and copy to reconPic */ | |
182 encodeResidue(ctu, cuGeom); | |
183 } | |
184 else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2) | |
185 compressInterCU_dist(ctu, cuGeom, qp); | |
186 else if (m_param->rdLevel <= 4) | |
187 compressInterCU_rd0_4(ctu, cuGeom, qp); | |
188 else | |
189 { | |
190 compressInterCU_rd5_6(ctu, cuGeom, zOrder, qp); | |
191 if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData) | |
192 { | |
193 CUData* bestCU = &m_modeDepth[0].bestMode->cu; | |
194 memcpy(&m_reuseInterDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition); | |
195 memcpy(&m_reuseInterDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_predMode, sizeof(uint8_t) * numPartition); | |
196 } | |
197 } | |
198 } | |
199 | |
200 return *m_modeDepth[0].bestMode; | |
201 } | |
202 | |
203 void Analysis::tryLossless(const CUGeom& cuGeom) | |
204 { | |
205 ModeDepth& md = m_modeDepth[cuGeom.depth]; | |
206 | |
207 if (!md.bestMode->distortion) | |
208 /* already lossless */ | |
209 return; | |
210 else if (md.bestMode->cu.isIntra(0)) | |
211 { | |
212 md.pred[PRED_LOSSLESS].initCosts(); | |
213 md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom); | |
214 PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0]; | |
215 uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir; | |
216 checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL); | |
217 checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth); | |
218 } | |
219 else | |
220 { | |
221 md.pred[PRED_LOSSLESS].initCosts(); | |
222 md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom); | |
223 md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv); | |
224 encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom); | |
225 checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth); | |
226 } | |
227 } | |
228 | |
229 void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t& zOrder, int32_t qp) | |
230 { | |
231 uint32_t depth = cuGeom.depth; | |
232 ModeDepth& md = m_modeDepth[depth]; | |
233 md.bestMode = NULL; | |
234 | |
235 bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); | |
236 bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); | |
237 | |
238 if (m_param->analysisMode == X265_ANALYSIS_LOAD) | |
239 { | |
240 uint8_t* reuseDepth = &m_reuseIntraDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions]; | |
241 uint8_t* reuseModes = &m_reuseIntraDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions]; | |
242 char* reusePartSizes = &m_reuseIntraDataCTU->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions]; | |
243 uint8_t* reuseChromaModes = &m_reuseIntraDataCTU->chromaModes[parentCTU.m_cuAddr * parentCTU.m_numPartitions]; | |
244 | |
245 if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.absPartIdx) | |
246 { | |
247 PartSize size = (PartSize)reusePartSizes[zOrder]; | |
248 Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN]; | |
249 mode.cu.initSubCU(parentCTU, cuGeom, qp); | |
250 checkIntra(mode, cuGeom, size, &reuseModes[zOrder], &reuseChromaModes[zOrder]); | |
251 checkBestMode(mode, depth); | |
252 | |
253 if (m_bTryLossless) | |
254 tryLossless(cuGeom); | |
255 | |
256 if (mightSplit) | |
257 addSplitFlagCost(*md.bestMode, cuGeom.depth); | |
258 | |
259 // increment zOrder offset to point to next best depth in sharedDepth buffer | |
260 zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]]; | |
261 mightSplit = false; | |
262 } | |
263 } | |
264 else if (mightNotSplit) | |
265 { | |
266 md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp); | |
267 checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL); | |
268 checkBestMode(md.pred[PRED_INTRA], depth); | |
269 | |
270 if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3) | |
271 { | |
272 md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp); | |
273 checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL); | |
274 checkBestMode(md.pred[PRED_INTRA_NxN], depth); | |
275 } | |
276 | |
277 if (m_bTryLossless) | |
278 tryLossless(cuGeom); | |
279 | |
280 if (mightSplit) | |
281 addSplitFlagCost(*md.bestMode, cuGeom.depth); | |
282 } | |
283 | |
284 if (mightSplit) | |
285 { | |
286 Mode* splitPred = &md.pred[PRED_SPLIT]; | |
287 splitPred->initCosts(); | |
288 CUData* splitCU = &splitPred->cu; | |
289 splitCU->initSubCU(parentCTU, cuGeom, qp); | |
290 | |
291 uint32_t nextDepth = depth + 1; | |
292 ModeDepth& nd = m_modeDepth[nextDepth]; | |
293 invalidateContexts(nextDepth); | |
294 Entropy* nextContext = &m_rqt[depth].cur; | |
295 int32_t nextQP = qp; | |
296 | |
297 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) | |
298 { | |
299 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx); | |
300 if (childGeom.flags & CUGeom::PRESENT) | |
301 { | |
302 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx); | |
303 m_rqt[nextDepth].cur.load(*nextContext); | |
304 | |
305 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth) | |
306 nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom)); | |
307 | |
308 compressIntraCU(parentCTU, childGeom, zOrder, nextQP); | |
309 | |
310 // Save best CU and pred data for this sub CU | |
311 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx); | |
312 splitPred->addSubCosts(*nd.bestMode); | |
313 nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx); | |
314 nextContext = &nd.bestMode->contexts; | |
315 } | |
316 else | |
317 { | |
318 /* record the depth of this non-present sub-CU */ | |
319 splitCU->setEmptyPart(childGeom, subPartIdx); | |
320 zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth]; | |
321 } | |
322 } | |
323 nextContext->store(splitPred->contexts); | |
324 if (mightNotSplit) | |
325 addSplitFlagCost(*splitPred, cuGeom.depth); | |
326 else | |
327 updateModeCost(*splitPred); | |
328 | |
329 checkDQPForSplitPred(*splitPred, cuGeom); | |
330 checkBestMode(*splitPred, depth); | |
331 } | |
332 | |
333 /* Copy best data to encData CTU and recon */ | |
334 md.bestMode->cu.copyToPic(depth); | |
335 if (md.bestMode != &md.pred[PRED_SPLIT]) | |
336 md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx); | |
337 } | |
338 | |
339 void Analysis::PMODE::processTasks(int workerThreadId) | |
340 { | |
341 #if DETAILED_CU_STATS | |
342 int fe = master.m_modeDepth[cuGeom.depth].pred[PRED_2Nx2N].cu.m_encData->m_frameEncoderID; | |
343 master.m_stats[fe].countPModeTasks++; | |
344 ScopedElapsedTime pmodeTime(master.m_stats[fe].pmodeTime); | |
345 #endif | |
346 ProfileScopeEvent(pmode); | |
347 master.processPmode(*this, master.m_tld[workerThreadId].analysis); | |
348 } | |
349 | |
350 /* process pmode jobs until none remain; may be called by the master thread or by | |
351 * a bonded peer (slave) thread via pmodeTasks() */ | |
352 void Analysis::processPmode(PMODE& pmode, Analysis& slave) | |
353 { | |
354 /* acquire a mode task, else exit early */ | |
355 int task; | |
356 pmode.m_lock.acquire(); | |
357 if (pmode.m_jobTotal > pmode.m_jobAcquired) | |
358 { | |
359 task = pmode.m_jobAcquired++; | |
360 pmode.m_lock.release(); | |
361 } | |
362 else | |
363 { | |
364 pmode.m_lock.release(); | |
365 return; | |
366 } | |
367 | |
368 ModeDepth& md = m_modeDepth[pmode.cuGeom.depth]; | |
369 | |
370 /* setup slave Analysis */ | |
371 if (&slave != this) | |
372 { | |
373 slave.m_slice = m_slice; | |
374 slave.m_frame = m_frame; | |
375 slave.m_param = m_param; | |
376 slave.setLambdaFromQP(md.pred[PRED_2Nx2N].cu, m_rdCost.m_qp); | |
377 slave.invalidateContexts(0); | |
378 slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur); | |
379 } | |
380 | |
381 /* perform Mode task, repeat until no more work is available */ | |
382 do | |
383 { | |
384 uint32_t refMasks[2] = { 0, 0 }; | |
385 | |
386 if (m_param->rdLevel <= 4) | |
387 { | |
388 switch (pmode.modes[task]) | |
389 { | |
390 case PRED_INTRA: | |
391 slave.checkIntraInInter(md.pred[PRED_INTRA], pmode.cuGeom); | |
392 if (m_param->rdLevel > 2) | |
393 slave.encodeIntraInInter(md.pred[PRED_INTRA], pmode.cuGeom); | |
394 break; | |
395 | |
396 case PRED_2Nx2N: | |
397 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; | |
398 | |
399 slave.checkInter_rd0_4(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, refMasks); | |
400 if (m_slice->m_sliceType == B_SLICE) | |
401 slave.checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], pmode.cuGeom); | |
402 break; | |
403 | |
404 case PRED_Nx2N: | |
405 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[2]; /* left */ | |
406 refMasks[1] = m_splitRefIdx[1] | m_splitRefIdx[3]; /* right */ | |
407 | |
408 slave.checkInter_rd0_4(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, refMasks); | |
409 break; | |
410 | |
411 case PRED_2NxN: | |
412 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1]; /* top */ | |
413 refMasks[1] = m_splitRefIdx[2] | m_splitRefIdx[3]; /* bot */ | |
414 | |
415 slave.checkInter_rd0_4(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, refMasks); | |
416 break; | |
417 | |
418 case PRED_2NxnU: | |
419 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1]; /* 25% top */ | |
420 refMasks[1] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% bot */ | |
421 | |
422 slave.checkInter_rd0_4(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, refMasks); | |
423 break; | |
424 | |
425 case PRED_2NxnD: | |
426 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% top */ | |
427 refMasks[1] = m_splitRefIdx[2] | m_splitRefIdx[3]; /* 25% bot */ | |
428 | |
429 slave.checkInter_rd0_4(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, refMasks); | |
430 break; | |
431 | |
432 case PRED_nLx2N: | |
433 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[2]; /* 25% left */ | |
434 refMasks[1] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% right */ | |
435 | |
436 slave.checkInter_rd0_4(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, refMasks); | |
437 break; | |
438 | |
439 case PRED_nRx2N: | |
440 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% left */ | |
441 refMasks[1] = m_splitRefIdx[1] | m_splitRefIdx[3]; /* 25% right */ | |
442 | |
443 slave.checkInter_rd0_4(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, refMasks); | |
444 break; | |
445 | |
446 default: | |
447 X265_CHECK(0, "invalid job ID for parallel mode analysis\n"); | |
448 break; | |
449 } | |
450 } | |
451 else | |
452 { | |
453 switch (pmode.modes[task]) | |
454 { | |
455 case PRED_INTRA: | |
456 slave.checkIntra(md.pred[PRED_INTRA], pmode.cuGeom, SIZE_2Nx2N, NULL, NULL); | |
457 if (pmode.cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3) | |
458 slave.checkIntra(md.pred[PRED_INTRA_NxN], pmode.cuGeom, SIZE_NxN, NULL, NULL); | |
459 break; | |
460 | |
461 case PRED_2Nx2N: | |
462 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; | |
463 | |
464 slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, refMasks); | |
465 md.pred[PRED_BIDIR].rdCost = MAX_INT64; | |
466 if (m_slice->m_sliceType == B_SLICE) | |
467 { | |
468 slave.checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], pmode.cuGeom); | |
469 if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64) | |
470 slave.encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], pmode.cuGeom); | |
471 } | |
472 break; | |
473 | |
474 case PRED_Nx2N: | |
475 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[2]; /* left */ | |
476 refMasks[1] = m_splitRefIdx[1] | m_splitRefIdx[3]; /* right */ | |
477 | |
478 slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, refMasks); | |
479 break; | |
480 | |
481 case PRED_2NxN: | |
482 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1]; /* top */ | |
483 refMasks[1] = m_splitRefIdx[2] | m_splitRefIdx[3]; /* bot */ | |
484 | |
485 slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, refMasks); | |
486 break; | |
487 | |
488 case PRED_2NxnU: | |
489 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1]; /* 25% top */ | |
490 refMasks[1] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% bot */ | |
491 | |
492 slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, refMasks); | |
493 break; | |
494 | |
495 case PRED_2NxnD: | |
496 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% top */ | |
497 refMasks[1] = m_splitRefIdx[2] | m_splitRefIdx[3]; /* 25% bot */ | |
498 slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, refMasks); | |
499 break; | |
500 | |
501 case PRED_nLx2N: | |
502 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[2]; /* 25% left */ | |
503 refMasks[1] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% right */ | |
504 | |
505 slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, refMasks); | |
506 break; | |
507 | |
508 case PRED_nRx2N: | |
509 refMasks[0] = m_splitRefIdx[0] | m_splitRefIdx[1] | m_splitRefIdx[2] | m_splitRefIdx[3]; /* 75% left */ | |
510 refMasks[1] = m_splitRefIdx[1] | m_splitRefIdx[3]; /* 25% right */ | |
511 slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, refMasks); | |
512 break; | |
513 | |
514 default: | |
515 X265_CHECK(0, "invalid job ID for parallel mode analysis\n"); | |
516 break; | |
517 } | |
518 } | |
519 | |
520 task = -1; | |
521 pmode.m_lock.acquire(); | |
522 if (pmode.m_jobTotal > pmode.m_jobAcquired) | |
523 task = pmode.m_jobAcquired++; | |
524 pmode.m_lock.release(); | |
525 } | |
526 while (task >= 0); | |
527 } | |
528 | |
529 uint32_t Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp) | |
530 { | |
531 uint32_t depth = cuGeom.depth; | |
532 uint32_t cuAddr = parentCTU.m_cuAddr; | |
533 ModeDepth& md = m_modeDepth[depth]; | |
534 md.bestMode = NULL; | |
535 | |
536 bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); | |
537 bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); | |
538 uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU, cuGeom) : 0; | |
539 uint32_t splitRefs[4] = { 0, 0, 0, 0 }; | |
540 | |
541 X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n"); | |
542 | |
543 PMODE pmode(*this, cuGeom); | |
544 | |
545 if (mightNotSplit && depth >= minDepth) | |
546 { | |
547 /* Initialize all prediction CUs based on parentCTU */ | |
548 md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); | |
549 md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); | |
550 | |
551 if (m_param->rdLevel <= 4) | |
552 checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); | |
553 else | |
554 checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom, false); | |
555 } | |
556 | |
557 bool bNoSplit = false; | |
558 bool splitIntra = true; | |
559 if (md.bestMode) | |
560 { | |
561 bNoSplit = md.bestMode->cu.isSkipped(0); | |
562 if (mightSplit && depth && depth >= minDepth && !bNoSplit && m_param->rdLevel <= 4) | |
563 bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); | |
564 } | |
565 | |
566 if (mightSplit && !bNoSplit) | |
567 { | |
568 Mode* splitPred = &md.pred[PRED_SPLIT]; | |
569 splitPred->initCosts(); | |
570 CUData* splitCU = &splitPred->cu; | |
571 splitCU->initSubCU(parentCTU, cuGeom, qp); | |
572 | |
573 uint32_t nextDepth = depth + 1; | |
574 ModeDepth& nd = m_modeDepth[nextDepth]; | |
575 invalidateContexts(nextDepth); | |
576 Entropy* nextContext = &m_rqt[depth].cur; | |
577 int nextQP = qp; | |
578 splitIntra = false; | |
579 | |
580 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) | |
581 { | |
582 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx); | |
583 if (childGeom.flags & CUGeom::PRESENT) | |
584 { | |
585 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx); | |
586 m_rqt[nextDepth].cur.load(*nextContext); | |
587 | |
588 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth) | |
589 nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom)); | |
590 | |
591 splitRefs[subPartIdx] = compressInterCU_dist(parentCTU, childGeom, nextQP); | |
592 | |
593 // Save best CU and pred data for this sub CU | |
594 splitIntra |= nd.bestMode->cu.isIntra(0); | |
595 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx); | |
596 splitPred->addSubCosts(*nd.bestMode); | |
597 | |
598 nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx); | |
599 nextContext = &nd.bestMode->contexts; | |
600 } | |
601 else | |
602 splitCU->setEmptyPart(childGeom, subPartIdx); | |
603 } | |
604 nextContext->store(splitPred->contexts); | |
605 | |
606 if (mightNotSplit) | |
607 addSplitFlagCost(*splitPred, cuGeom.depth); | |
608 else | |
609 updateModeCost(*splitPred); | |
610 | |
611 checkDQPForSplitPred(*splitPred, cuGeom); | |
612 } | |
613 | |
614 if (mightNotSplit && depth >= minDepth) | |
615 { | |
616 int bTryAmp = m_slice->m_sps->maxAMPDepth > depth; | |
617 int bTryIntra = (m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) && (!m_param->limitReferences || splitIntra); | |
618 | |
619 if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0) | |
620 setLambdaFromQP(parentCTU, qp); | |
621 | |
622 if (bTryIntra) | |
623 { | |
624 md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp); | |
625 if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3 && m_param->rdLevel >= 5) | |
626 md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp); | |
627 pmode.modes[pmode.m_jobTotal++] = PRED_INTRA; | |
628 } | |
629 md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2Nx2N; | |
630 md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp); | |
631 if (m_param->bEnableRectInter) | |
632 { | |
633 md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxN; | |
634 md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_Nx2N; | |
635 } | |
636 if (bTryAmp) | |
637 { | |
638 md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnU; | |
639 md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnD; | |
640 md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nLx2N; | |
641 md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nRx2N; | |
642 } | |
643 | |
644 m_splitRefIdx[0] = splitRefs[0]; m_splitRefIdx[1] = splitRefs[1]; m_splitRefIdx[2] = splitRefs[2]; m_splitRefIdx[3] = splitRefs[3]; | |
645 | |
646 pmode.tryBondPeers(*m_frame->m_encData->m_jobProvider, pmode.m_jobTotal); | |
647 | |
648 /* participate in processing jobs, until all are distributed */ | |
649 processPmode(pmode, *this); | |
650 | |
651 /* the master worker thread (this one) does merge analysis. By doing | |
652 * merge after all the other jobs are at least started, we usually avoid | |
653 * blocking on another thread */ | |
654 | |
655 if (m_param->rdLevel <= 4) | |
656 { | |
657 { | |
658 ProfileCUScope(parentCTU, pmodeBlockTime, countPModeMasters); | |
659 pmode.waitForExit(); | |
660 } | |
661 | |
662 /* select best inter mode based on sa8d cost */ | |
663 Mode *bestInter = &md.pred[PRED_2Nx2N]; | |
664 | |
665 if (m_param->bEnableRectInter) | |
666 { | |
667 if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost) | |
668 bestInter = &md.pred[PRED_Nx2N]; | |
669 if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost) | |
670 bestInter = &md.pred[PRED_2NxN]; | |
671 } | |
672 | |
673 if (bTryAmp) | |
674 { | |
675 if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost) | |
676 bestInter = &md.pred[PRED_2NxnU]; | |
677 if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost) | |
678 bestInter = &md.pred[PRED_2NxnD]; | |
679 if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost) | |
680 bestInter = &md.pred[PRED_nLx2N]; | |
681 if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost) | |
682 bestInter = &md.pred[PRED_nRx2N]; | |
683 } | |
684 | |
685 if (m_param->rdLevel > 2) | |
686 { | |
687 /* RD selection between merge, inter, bidir and intra */ | |
688 if (!m_bChromaSa8d) /* When m_bChromaSa8d is enabled, chroma MC has already been done */ | |
689 { | |
690 uint32_t numPU = bestInter->cu.getNumPartInter(0); | |
691 for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) | |
692 { | |
693 PredictionUnit pu(bestInter->cu, cuGeom, puIdx); | |
694 motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true); | |
695 } | |
696 } | |
697 encodeResAndCalcRdInterCU(*bestInter, cuGeom); | |
698 checkBestMode(*bestInter, depth); | |
699 | |
700 /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */ | |
701 if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 && | |
702 md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17) | |
703 { | |
704 encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom); | |
705 checkBestMode(md.pred[PRED_BIDIR], depth); | |
706 } | |
707 | |
708 if (bTryIntra) | |
709 checkBestMode(md.pred[PRED_INTRA], depth); | |
710 } | |
711 else /* m_param->rdLevel == 2 */ | |
712 { | |
713 if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost) | |
714 md.bestMode = bestInter; | |
715 | |
716 if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost) | |
717 md.bestMode = &md.pred[PRED_BIDIR]; | |
718 | |
719 if (bTryIntra && md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost) | |
720 { | |
721 md.bestMode = &md.pred[PRED_INTRA]; | |
722 encodeIntraInInter(*md.bestMode, cuGeom); | |
723 } | |
724 else if (!md.bestMode->cu.m_mergeFlag[0]) | |
725 { | |
726 /* finally code the best mode selected from SA8D costs */ | |
727 uint32_t numPU = md.bestMode->cu.getNumPartInter(0); | |
728 for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) | |
729 { | |
730 PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx); | |
731 motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true); | |
732 } | |
733 encodeResAndCalcRdInterCU(*md.bestMode, cuGeom); | |
734 } | |
735 } | |
736 } | |
737 else | |
738 { | |
739 { | |
740 ProfileCUScope(parentCTU, pmodeBlockTime, countPModeMasters); | |
741 pmode.waitForExit(); | |
742 } | |
743 | |
744 checkBestMode(md.pred[PRED_2Nx2N], depth); | |
745 if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost < MAX_INT64) | |
746 checkBestMode(md.pred[PRED_BIDIR], depth); | |
747 | |
748 if (m_param->bEnableRectInter) | |
749 { | |
750 checkBestMode(md.pred[PRED_Nx2N], depth); | |
751 checkBestMode(md.pred[PRED_2NxN], depth); | |
752 } | |
753 | |
754 if (bTryAmp) | |
755 { | |
756 checkBestMode(md.pred[PRED_2NxnU], depth); | |
757 checkBestMode(md.pred[PRED_2NxnD], depth); | |
758 checkBestMode(md.pred[PRED_nLx2N], depth); | |
759 checkBestMode(md.pred[PRED_nRx2N], depth); | |
760 } | |
761 | |
762 if (bTryIntra) | |
763 { | |
764 checkBestMode(md.pred[PRED_INTRA], depth); | |
765 if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3) | |
766 checkBestMode(md.pred[PRED_INTRA_NxN], depth); | |
767 } | |
768 } | |
769 | |
770 if (m_bTryLossless) | |
771 tryLossless(cuGeom); | |
772 | |
773 if (mightSplit) | |
774 addSplitFlagCost(*md.bestMode, cuGeom.depth); | |
775 } | |
776 | |
777 /* compare split RD cost against best cost */ | |
778 if (mightSplit && !bNoSplit) | |
779 checkBestMode(md.pred[PRED_SPLIT], depth); | |
780 | |
781 /* determine which motion references the parent CU should search */ | |
782 uint32_t refMask; | |
783 if (!(m_param->limitReferences & X265_REF_LIMIT_DEPTH)) | |
784 refMask = 0; | |
785 else if (md.bestMode == &md.pred[PRED_SPLIT]) | |
786 refMask = splitRefs[0] | splitRefs[1] | splitRefs[2] | splitRefs[3]; | |
787 else | |
788 { | |
789 /* use best merge/inter mode, in case of intra use 2Nx2N inter references */ | |
790 CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu; | |
791 uint32_t numPU = cu.getNumPartInter(0); | |
792 refMask = 0; | |
793 for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0)) | |
794 refMask |= cu.getBestRefIdx(subPartIdx); | |
795 } | |
796 | |
797 if (mightNotSplit) | |
798 { | |
799 /* early-out statistics */ | |
800 FrameData& curEncData = *m_frame->m_encData; | |
801 FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr]; | |
802 uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth]; | |
803 cuStat.count[depth] += 1; | |
804 cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth]; | |
805 } | |
806 | |
807 /* Copy best data to encData CTU and recon */ | |
808 X265_CHECK(md.bestMode->ok(), "best mode is not ok"); | |
809 md.bestMode->cu.copyToPic(depth); | |
810 md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx); | |
811 | |
812 return refMask; | |
813 } | |
814 | |
815 uint32_t Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp) | |
816 { | |
817 uint32_t depth = cuGeom.depth; | |
818 uint32_t cuAddr = parentCTU.m_cuAddr; | |
819 ModeDepth& md = m_modeDepth[depth]; | |
820 md.bestMode = NULL; | |
821 | |
822 PicYuv& reconPic = *m_frame->m_reconPic; | |
823 | |
824 bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); | |
825 bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); | |
826 uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom); | |
827 bool earlyskip = false; | |
828 bool splitIntra = true; | |
829 uint32_t splitRefs[4] = { 0, 0, 0, 0 }; | |
830 /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */ | |
831 if (mightNotSplit && depth >= minDepth) | |
832 { | |
833 /* Compute Merge Cost */ | |
834 md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); | |
835 md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); | |
836 checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); | |
837 if (m_param->rdLevel) | |
838 earlyskip = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth | |
839 } | |
840 | |
841 bool bNoSplit = false; | |
842 if (md.bestMode) | |
843 { | |
844 bNoSplit = md.bestMode->cu.isSkipped(0); | |
845 if (mightSplit && depth && depth >= minDepth && !bNoSplit) | |
846 bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); | |
847 } | |
848 | |
849 /* Step 2. Evaluate each of the 4 split sub-blocks in series */ | |
850 if (mightSplit && !bNoSplit) | |
851 { | |
852 Mode* splitPred = &md.pred[PRED_SPLIT]; | |
853 splitPred->initCosts(); | |
854 CUData* splitCU = &splitPred->cu; | |
855 splitCU->initSubCU(parentCTU, cuGeom, qp); | |
856 | |
857 uint32_t nextDepth = depth + 1; | |
858 ModeDepth& nd = m_modeDepth[nextDepth]; | |
859 invalidateContexts(nextDepth); | |
860 Entropy* nextContext = &m_rqt[depth].cur; | |
861 int nextQP = qp; | |
862 splitIntra = false; | |
863 | |
864 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) | |
865 { | |
866 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx); | |
867 if (childGeom.flags & CUGeom::PRESENT) | |
868 { | |
869 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx); | |
870 m_rqt[nextDepth].cur.load(*nextContext); | |
871 | |
872 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth) | |
873 nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom)); | |
874 | |
875 splitRefs[subPartIdx] = compressInterCU_rd0_4(parentCTU, childGeom, nextQP); | |
876 | |
877 // Save best CU and pred data for this sub CU | |
878 splitIntra |= nd.bestMode->cu.isIntra(0); | |
879 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx); | |
880 splitPred->addSubCosts(*nd.bestMode); | |
881 | |
882 if (m_param->rdLevel) | |
883 nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx); | |
884 else | |
885 nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childGeom.numPartitions * subPartIdx); | |
886 if (m_param->rdLevel > 1) | |
887 nextContext = &nd.bestMode->contexts; | |
888 } | |
889 else | |
890 splitCU->setEmptyPart(childGeom, subPartIdx); | |
891 } | |
892 nextContext->store(splitPred->contexts); | |
893 | |
894 if (mightNotSplit) | |
895 addSplitFlagCost(*splitPred, cuGeom.depth); | |
896 else if (m_param->rdLevel > 1) | |
897 updateModeCost(*splitPred); | |
898 else | |
899 splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion, splitPred->sa8dBits); | |
900 } | |
901 | |
902 /* Split CUs | |
903 * 0 1 | |
904 * 2 3 */ | |
905 uint32_t allSplitRefs = splitRefs[0] | splitRefs[1] | splitRefs[2] | splitRefs[3]; | |
906 /* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */ | |
907 if (mightNotSplit && depth >= minDepth) | |
908 { | |
909 if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0) | |
910 setLambdaFromQP(parentCTU, qp); | |
911 | |
912 if (!earlyskip) | |
913 { | |
914 uint32_t refMasks[2]; | |
915 refMasks[0] = allSplitRefs; | |
916 md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); | |
917 checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); | |
918 | |
919 if (m_param->limitReferences & X265_REF_LIMIT_CU) | |
920 { | |
921 CUData& cu = md.pred[PRED_2Nx2N].cu; | |
922 uint32_t refMask = cu.getBestRefIdx(0); | |
923 allSplitRefs = splitRefs[0] = splitRefs[1] = splitRefs[2] = splitRefs[3] = refMask; | |
924 } | |
925 | |
926 if (m_slice->m_sliceType == B_SLICE) | |
927 { | |
928 md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp); | |
929 checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom); | |
930 } | |
931 | |
932 Mode *bestInter = &md.pred[PRED_2Nx2N]; | |
933 if (m_param->bEnableRectInter) | |
934 { | |
935 refMasks[0] = splitRefs[0] | splitRefs[2]; /* left */ | |
936 refMasks[1] = splitRefs[1] | splitRefs[3]; /* right */ | |
937 md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); | |
938 checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks); | |
939 if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost) | |
940 bestInter = &md.pred[PRED_Nx2N]; | |
941 | |
942 refMasks[0] = splitRefs[0] | splitRefs[1]; /* top */ | |
943 refMasks[1] = splitRefs[2] | splitRefs[3]; /* bot */ | |
944 md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); | |
945 checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); | |
946 if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost) | |
947 bestInter = &md.pred[PRED_2NxN]; | |
948 } | |
949 | |
950 if (m_slice->m_sps->maxAMPDepth > depth) | |
951 { | |
952 bool bHor = false, bVer = false; | |
953 if (bestInter->cu.m_partSize[0] == SIZE_2NxN) | |
954 bHor = true; | |
955 else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N) | |
956 bVer = true; | |
957 else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N && | |
958 md.bestMode && md.bestMode->cu.getQtRootCbf(0)) | |
959 { | |
960 bHor = true; | |
961 bVer = true; | |
962 } | |
963 | |
964 if (bHor) | |
965 { | |
966 refMasks[0] = splitRefs[0] | splitRefs[1]; /* 25% top */ | |
967 refMasks[1] = allSplitRefs; /* 75% bot */ | |
968 md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); | |
969 checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks); | |
970 if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost) | |
971 bestInter = &md.pred[PRED_2NxnU]; | |
972 | |
973 refMasks[0] = allSplitRefs; /* 75% top */ | |
974 refMasks[1] = splitRefs[2] | splitRefs[3]; /* 25% bot */ | |
975 md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); | |
976 checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); | |
977 if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost) | |
978 bestInter = &md.pred[PRED_2NxnD]; | |
979 } | |
980 if (bVer) | |
981 { | |
982 refMasks[0] = splitRefs[0] | splitRefs[2]; /* 25% left */ | |
983 refMasks[1] = allSplitRefs; /* 75% right */ | |
984 md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); | |
985 checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks); | |
986 if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost) | |
987 bestInter = &md.pred[PRED_nLx2N]; | |
988 | |
989 refMasks[0] = allSplitRefs; /* 75% left */ | |
990 refMasks[1] = splitRefs[1] | splitRefs[3]; /* 25% right */ | |
991 md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); | |
992 checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); | |
993 if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost) | |
994 bestInter = &md.pred[PRED_nRx2N]; | |
995 } | |
996 } | |
997 bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames; | |
998 if (m_param->rdLevel >= 3) | |
999 { | |
1000 /* Calculate RD cost of best inter option */ | |
1001 if (!m_bChromaSa8d) /* When m_bChromaSa8d is enabled, chroma MC has already been done */ | |
1002 { | |
1003 uint32_t numPU = bestInter->cu.getNumPartInter(0); | |
1004 for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) | |
1005 { | |
1006 PredictionUnit pu(bestInter->cu, cuGeom, puIdx); | |
1007 motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true); | |
1008 } | |
1009 } | |
1010 encodeResAndCalcRdInterCU(*bestInter, cuGeom); | |
1011 checkBestMode(*bestInter, depth); | |
1012 | |
1013 /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */ | |
1014 if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 && | |
1015 md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17) | |
1016 { | |
1017 encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom); | |
1018 checkBestMode(md.pred[PRED_BIDIR], depth); | |
1019 } | |
1020 | |
1021 if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) || | |
1022 md.bestMode->sa8dCost == MAX_INT64) | |
1023 { | |
1024 if (!m_param->limitReferences || splitIntra) | |
1025 { | |
1026 ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]); | |
1027 md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp); | |
1028 checkIntraInInter(md.pred[PRED_INTRA], cuGeom); | |
1029 encodeIntraInInter(md.pred[PRED_INTRA], cuGeom); | |
1030 checkBestMode(md.pred[PRED_INTRA], depth); | |
1031 } | |
1032 else | |
1033 { | |
1034 ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]); | |
1035 } | |
1036 } | |
1037 } | |
1038 else | |
1039 { | |
1040 /* SA8D choice between merge/skip, inter, bidir, and intra */ | |
1041 if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost) | |
1042 md.bestMode = bestInter; | |
1043 | |
1044 if (m_slice->m_sliceType == B_SLICE && | |
1045 md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost) | |
1046 md.bestMode = &md.pred[PRED_BIDIR]; | |
1047 | |
1048 if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64) | |
1049 { | |
1050 if (!m_param->limitReferences || splitIntra) | |
1051 { | |
1052 ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]); | |
1053 md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp); | |
1054 checkIntraInInter(md.pred[PRED_INTRA], cuGeom); | |
1055 if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost) | |
1056 md.bestMode = &md.pred[PRED_INTRA]; | |
1057 } | |
1058 else | |
1059 { | |
1060 ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]); | |
1061 } | |
1062 } | |
1063 | |
1064 /* finally code the best mode selected by SA8D costs: | |
1065 * RD level 2 - fully encode the best mode | |
1066 * RD level 1 - generate recon pixels | |
1067 * RD level 0 - generate chroma prediction */ | |
1068 if (md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) | |
1069 { | |
1070 /* prediction already generated for this CU, and if rd level | |
1071 * is not 0, it is already fully encoded */ | |
1072 } | |
1073 else if (md.bestMode->cu.isInter(0)) | |
1074 { | |
1075 uint32_t numPU = md.bestMode->cu.getNumPartInter(0); | |
1076 for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) | |
1077 { | |
1078 PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx); | |
1079 motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true); | |
1080 } | |
1081 if (m_param->rdLevel == 2) | |
1082 encodeResAndCalcRdInterCU(*md.bestMode, cuGeom); | |
1083 else if (m_param->rdLevel == 1) | |
1084 { | |
1085 /* generate recon pixels with no rate distortion considerations */ | |
1086 CUData& cu = md.bestMode->cu; | |
1087 | |
1088 uint32_t tuDepthRange[2]; | |
1089 cu.getInterTUQtDepthRange(tuDepthRange, 0); | |
1090 | |
1091 m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize); | |
1092 residualTransformQuantInter(*md.bestMode, cuGeom, 0, 0, tuDepthRange); | |
1093 if (cu.getQtRootCbf(0)) | |
1094 md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]); | |
1095 else | |
1096 { | |
1097 md.bestMode->reconYuv.copyFromYuv(md.bestMode->predYuv); | |
1098 if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N) | |
1099 cu.setPredModeSubParts(MODE_SKIP); | |
1100 } | |
1101 } | |
1102 } | |
1103 else | |
1104 { | |
1105 if (m_param->rdLevel == 2) | |
1106 encodeIntraInInter(*md.bestMode, cuGeom); | |
1107 else if (m_param->rdLevel == 1) | |
1108 { | |
1109 /* generate recon pixels with no rate distortion considerations */ | |
1110 CUData& cu = md.bestMode->cu; | |
1111 | |
1112 uint32_t tuDepthRange[2]; | |
1113 cu.getIntraTUQtDepthRange(tuDepthRange, 0); | |
1114 | |
1115 residualTransformQuantIntra(*md.bestMode, cuGeom, 0, 0, tuDepthRange); | |
1116 getBestIntraModeChroma(*md.bestMode, cuGeom); | |
1117 residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0); | |
1118 md.bestMode->reconYuv.copyFromPicYuv(reconPic, cu.m_cuAddr, cuGeom.absPartIdx); // TODO: | |
1119 } | |
1120 } | |
1121 } | |
1122 } // !earlyskip | |
1123 | |
1124 if (m_bTryLossless) | |
1125 tryLossless(cuGeom); | |
1126 | |
1127 if (mightSplit) | |
1128 addSplitFlagCost(*md.bestMode, cuGeom.depth); | |
1129 } | |
1130 | |
1131 if (mightSplit && !bNoSplit) | |
1132 { | |
1133 Mode* splitPred = &md.pred[PRED_SPLIT]; | |
1134 if (!md.bestMode) | |
1135 md.bestMode = splitPred; | |
1136 else if (m_param->rdLevel > 1) | |
1137 checkBestMode(*splitPred, cuGeom.depth); | |
1138 else if (splitPred->sa8dCost < md.bestMode->sa8dCost) | |
1139 md.bestMode = splitPred; | |
1140 | |
1141 checkDQPForSplitPred(*md.bestMode, cuGeom); | |
1142 } | |
1143 | |
1144 /* determine which motion references the parent CU should search */ | |
1145 uint32_t refMask; | |
1146 if (!(m_param->limitReferences & X265_REF_LIMIT_DEPTH)) | |
1147 refMask = 0; | |
1148 else if (md.bestMode == &md.pred[PRED_SPLIT]) | |
1149 refMask = allSplitRefs; | |
1150 else | |
1151 { | |
1152 /* use best merge/inter mode, in case of intra use 2Nx2N inter references */ | |
1153 CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu; | |
1154 uint32_t numPU = cu.getNumPartInter(0); | |
1155 refMask = 0; | |
1156 for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0)) | |
1157 refMask |= cu.getBestRefIdx(subPartIdx); | |
1158 } | |
1159 | |
1160 if (mightNotSplit) | |
1161 { | |
1162 /* early-out statistics */ | |
1163 FrameData& curEncData = *m_frame->m_encData; | |
1164 FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr]; | |
1165 uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth]; | |
1166 cuStat.count[depth] += 1; | |
1167 cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth]; | |
1168 } | |
1169 | |
1170 /* Copy best data to encData CTU and recon */ | |
1171 X265_CHECK(md.bestMode->ok(), "best mode is not ok"); | |
1172 md.bestMode->cu.copyToPic(depth); | |
1173 if (m_param->rdLevel) | |
1174 md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx); | |
1175 | |
1176 return refMask; | |
1177 } | |
1178 | |
1179 uint32_t Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp) | |
1180 { | |
1181 uint32_t depth = cuGeom.depth; | |
1182 ModeDepth& md = m_modeDepth[depth]; | |
1183 md.bestMode = NULL; | |
1184 | |
1185 bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); | |
1186 bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); | |
1187 | |
1188 if (m_param->analysisMode == X265_ANALYSIS_LOAD) | |
1189 { | |
1190 uint8_t* reuseDepth = &m_reuseInterDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions]; | |
1191 uint8_t* reuseModes = &m_reuseInterDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions]; | |
1192 if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.absPartIdx && reuseModes[zOrder] == MODE_SKIP) | |
1193 { | |
1194 md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); | |
1195 md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); | |
1196 checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom, true); | |
1197 | |
1198 if (m_bTryLossless) | |
1199 tryLossless(cuGeom); | |
1200 | |
1201 if (mightSplit) | |
1202 addSplitFlagCost(*md.bestMode, cuGeom.depth); | |
1203 | |
1204 // increment zOrder offset to point to next best depth in sharedDepth buffer | |
1205 zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]]; | |
1206 | |
1207 mightSplit = false; | |
1208 mightNotSplit = false; | |
1209 } | |
1210 } | |
1211 | |
1212 bool foundSkip = false; | |
1213 bool splitIntra = true; | |
1214 uint32_t splitRefs[4] = { 0, 0, 0, 0 }; | |
1215 /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */ | |
1216 if (mightNotSplit) | |
1217 { | |
1218 md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); | |
1219 md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); | |
1220 checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom, false); | |
1221 foundSkip = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); | |
1222 } | |
1223 | |
1224 // estimate split cost | |
1225 /* Step 2. Evaluate each of the 4 split sub-blocks in series */ | |
1226 if (mightSplit && !foundSkip) | |
1227 { | |
1228 Mode* splitPred = &md.pred[PRED_SPLIT]; | |
1229 splitPred->initCosts(); | |
1230 CUData* splitCU = &splitPred->cu; | |
1231 splitCU->initSubCU(parentCTU, cuGeom, qp); | |
1232 | |
1233 uint32_t nextDepth = depth + 1; | |
1234 ModeDepth& nd = m_modeDepth[nextDepth]; | |
1235 invalidateContexts(nextDepth); | |
1236 Entropy* nextContext = &m_rqt[depth].cur; | |
1237 int nextQP = qp; | |
1238 splitIntra = false; | |
1239 | |
1240 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) | |
1241 { | |
1242 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx); | |
1243 if (childGeom.flags & CUGeom::PRESENT) | |
1244 { | |
1245 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx); | |
1246 m_rqt[nextDepth].cur.load(*nextContext); | |
1247 | |
1248 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth) | |
1249 nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom)); | |
1250 | |
1251 splitRefs[subPartIdx] = compressInterCU_rd5_6(parentCTU, childGeom, zOrder, nextQP); | |
1252 | |
1253 // Save best CU and pred data for this sub CU | |
1254 splitIntra |= nd.bestMode->cu.isIntra(0); | |
1255 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx); | |
1256 splitPred->addSubCosts(*nd.bestMode); | |
1257 nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx); | |
1258 nextContext = &nd.bestMode->contexts; | |
1259 } | |
1260 else | |
1261 { | |
1262 splitCU->setEmptyPart(childGeom, subPartIdx); | |
1263 zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth]; | |
1264 } | |
1265 } | |
1266 nextContext->store(splitPred->contexts); | |
1267 if (mightNotSplit) | |
1268 addSplitFlagCost(*splitPred, cuGeom.depth); | |
1269 else | |
1270 updateModeCost(*splitPred); | |
1271 | |
1272 checkDQPForSplitPred(*splitPred, cuGeom); | |
1273 } | |
1274 | |
1275 /* Split CUs | |
1276 * 0 1 | |
1277 * 2 3 */ | |
1278 uint32_t allSplitRefs = splitRefs[0] | splitRefs[1] | splitRefs[2] | splitRefs[3]; | |
1279 /* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */ | |
1280 if (mightNotSplit) | |
1281 { | |
1282 if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0) | |
1283 setLambdaFromQP(parentCTU, qp); | |
1284 | |
1285 if (!(foundSkip && m_param->bEnableEarlySkip)) | |
1286 { | |
1287 uint32_t refMasks[2]; | |
1288 refMasks[0] = allSplitRefs; | |
1289 md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); | |
1290 checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); | |
1291 checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); | |
1292 | |
1293 if (m_param->limitReferences & X265_REF_LIMIT_CU) | |
1294 { | |
1295 CUData& cu = md.pred[PRED_2Nx2N].cu; | |
1296 uint32_t refMask = cu.getBestRefIdx(0); | |
1297 allSplitRefs = splitRefs[0] = splitRefs[1] = splitRefs[2] = splitRefs[3] = refMask; | |
1298 } | |
1299 | |
1300 if (m_slice->m_sliceType == B_SLICE) | |
1301 { | |
1302 md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp); | |
1303 checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom); | |
1304 if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64) | |
1305 { | |
1306 encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom); | |
1307 checkBestMode(md.pred[PRED_BIDIR], cuGeom.depth); | |
1308 } | |
1309 } | |
1310 | |
1311 if (m_param->bEnableRectInter) | |
1312 { | |
1313 refMasks[0] = splitRefs[0] | splitRefs[2]; /* left */ | |
1314 refMasks[1] = splitRefs[1] | splitRefs[3]; /* right */ | |
1315 md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); | |
1316 checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks); | |
1317 checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth); | |
1318 | |
1319 refMasks[0] = splitRefs[0] | splitRefs[1]; /* top */ | |
1320 refMasks[1] = splitRefs[2] | splitRefs[3]; /* bot */ | |
1321 md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); | |
1322 checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks); | |
1323 checkBestMode(md.pred[PRED_2NxN], cuGeom.depth); | |
1324 } | |
1325 | |
1326 // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N) | |
1327 if (m_slice->m_sps->maxAMPDepth > depth) | |
1328 { | |
1329 bool bHor = false, bVer = false; | |
1330 if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN) | |
1331 bHor = true; | |
1332 else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N) | |
1333 bVer = true; | |
1334 else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0]) | |
1335 { | |
1336 bHor = true; | |
1337 bVer = true; | |
1338 } | |
1339 | |
1340 if (bHor) | |
1341 { | |
1342 refMasks[0] = splitRefs[0] | splitRefs[1]; /* 25% top */ | |
1343 refMasks[1] = allSplitRefs; /* 75% bot */ | |
1344 md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); | |
1345 checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks); | |
1346 checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth); | |
1347 | |
1348 refMasks[0] = allSplitRefs; /* 75% top */ | |
1349 refMasks[1] = splitRefs[2] | splitRefs[3]; /* 25% bot */ | |
1350 md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); | |
1351 checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks); | |
1352 checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth); | |
1353 } | |
1354 if (bVer) | |
1355 { | |
1356 refMasks[0] = splitRefs[0] | splitRefs[2]; /* 25% left */ | |
1357 refMasks[1] = allSplitRefs; /* 75% right */ | |
1358 md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); | |
1359 checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks); | |
1360 checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth); | |
1361 | |
1362 refMasks[0] = allSplitRefs; /* 75% left */ | |
1363 refMasks[1] = splitRefs[1] | splitRefs[3]; /* 25% right */ | |
1364 md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); | |
1365 checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks); | |
1366 checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth); | |
1367 } | |
1368 } | |
1369 | |
1370 if (m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) | |
1371 { | |
1372 if (!m_param->limitReferences || splitIntra) | |
1373 { | |
1374 ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]); | |
1375 md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp); | |
1376 checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL); | |
1377 checkBestMode(md.pred[PRED_INTRA], depth); | |
1378 | |
1379 if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3) | |
1380 { | |
1381 md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp); | |
1382 checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL); | |
1383 checkBestMode(md.pred[PRED_INTRA_NxN], depth); | |
1384 } | |
1385 } | |
1386 else | |
1387 { | |
1388 ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]); | |
1389 } | |
1390 } | |
1391 } | |
1392 | |
1393 if (m_bTryLossless) | |
1394 tryLossless(cuGeom); | |
1395 | |
1396 if (mightSplit) | |
1397 addSplitFlagCost(*md.bestMode, cuGeom.depth); | |
1398 } | |
1399 | |
1400 /* compare split RD cost against best cost */ | |
1401 if (mightSplit && !foundSkip) | |
1402 checkBestMode(md.pred[PRED_SPLIT], depth); | |
1403 | |
1404 /* determine which motion references the parent CU should search */ | |
1405 uint32_t refMask; | |
1406 if (!(m_param->limitReferences & X265_REF_LIMIT_DEPTH)) | |
1407 refMask = 0; | |
1408 else if (md.bestMode == &md.pred[PRED_SPLIT]) | |
1409 refMask = allSplitRefs; | |
1410 else | |
1411 { | |
1412 /* use best merge/inter mode, in case of intra use 2Nx2N inter references */ | |
1413 CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu; | |
1414 uint32_t numPU = cu.getNumPartInter(0); | |
1415 refMask = 0; | |
1416 for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0)) | |
1417 refMask |= cu.getBestRefIdx(subPartIdx); | |
1418 } | |
1419 | |
1420 /* Copy best data to encData CTU and recon */ | |
1421 X265_CHECK(md.bestMode->ok(), "best mode is not ok"); | |
1422 md.bestMode->cu.copyToPic(depth); | |
1423 md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx); | |
1424 | |
1425 return refMask; | |
1426 } | |
1427 | |
1428 /* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */ | |
1429 void Analysis::checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom) | |
1430 { | |
1431 uint32_t depth = cuGeom.depth; | |
1432 ModeDepth& md = m_modeDepth[depth]; | |
1433 Yuv *fencYuv = &md.fencYuv; | |
1434 | |
1435 /* Note that these two Mode instances are named MERGE and SKIP but they may | |
1436 * hold the reverse when the function returns. We toggle between the two modes */ | |
1437 Mode* tempPred = &merge; | |
1438 Mode* bestPred = &skip; | |
1439 | |
1440 X265_CHECK(m_slice->m_sliceType != I_SLICE, "Evaluating merge in I slice\n"); | |
1441 | |
1442 tempPred->initCosts(); | |
1443 tempPred->cu.setPartSizeSubParts(SIZE_2Nx2N); | |
1444 tempPred->cu.setPredModeSubParts(MODE_INTER); | |
1445 tempPred->cu.m_mergeFlag[0] = true; | |
1446 | |
1447 bestPred->initCosts(); | |
1448 bestPred->cu.setPartSizeSubParts(SIZE_2Nx2N); | |
1449 bestPred->cu.setPredModeSubParts(MODE_INTER); | |
1450 bestPred->cu.m_mergeFlag[0] = true; | |
1451 | |
1452 MVField candMvField[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists | |
1453 uint8_t candDir[MRG_MAX_NUM_CANDS]; | |
1454 uint32_t numMergeCand = tempPred->cu.getInterMergeCandidates(0, 0, candMvField, candDir); | |
1455 PredictionUnit pu(merge.cu, cuGeom, 0); | |
1456 | |
1457 bestPred->sa8dCost = MAX_INT64; | |
1458 int bestSadCand = -1; | |
1459 int sizeIdx = cuGeom.log2CUSize - 2; | |
1460 | |
1461 for (uint32_t i = 0; i < numMergeCand; ++i) | |
1462 { | |
1463 if (m_bFrameParallel && | |
1464 (candMvField[i][0].mv.y >= (m_param->searchRange + 1) * 4 || | |
1465 candMvField[i][1].mv.y >= (m_param->searchRange + 1) * 4)) | |
1466 continue; | |
1467 | |
1468 tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; // merge candidate ID is stored in L0 MVP idx | |
1469 X265_CHECK(m_slice->m_sliceType == B_SLICE || !(candDir[i] & 0x10), " invalid merge for P slice\n"); | |
1470 tempPred->cu.m_interDir[0] = candDir[i]; | |
1471 tempPred->cu.m_mv[0][0] = candMvField[i][0].mv; | |
1472 tempPred->cu.m_mv[1][0] = candMvField[i][1].mv; | |
1473 tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx; | |
1474 tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx; | |
1475 | |
1476 motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_bChromaSa8d); | |
1477 | |
1478 tempPred->sa8dBits = getTUBits(i, numMergeCand); | |
1479 tempPred->distortion = primitives.cu[sizeIdx].sa8d(fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size); | |
1480 if (m_bChromaSa8d) | |
1481 { | |
1482 tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize); | |
1483 tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize); | |
1484 } | |
1485 tempPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)tempPred->distortion, tempPred->sa8dBits); | |
1486 | |
1487 if (tempPred->sa8dCost < bestPred->sa8dCost) | |
1488 { | |
1489 bestSadCand = i; | |
1490 std::swap(tempPred, bestPred); | |
1491 } | |
1492 } | |
1493 | |
1494 /* force mode decision to take inter or intra */ | |
1495 if (bestSadCand < 0) | |
1496 return; | |
1497 | |
1498 /* calculate the motion compensation for chroma for the best mode selected */ | |
1499 if (!m_bChromaSa8d) /* Chroma MC was done above */ | |
1500 motionCompensation(bestPred->cu, pu, bestPred->predYuv, false, true); | |
1501 | |
1502 if (m_param->rdLevel) | |
1503 { | |
1504 if (m_param->bLossless) | |
1505 bestPred->rdCost = MAX_INT64; | |
1506 else | |
1507 encodeResAndCalcRdSkipCU(*bestPred); | |
1508 | |
1509 /* Encode with residual */ | |
1510 tempPred->cu.m_mvpIdx[0][0] = (uint8_t)bestSadCand; | |
1511 tempPred->cu.setPUInterDir(candDir[bestSadCand], 0, 0); | |
1512 tempPred->cu.setPUMv(0, candMvField[bestSadCand][0].mv, 0, 0); | |
1513 tempPred->cu.setPUMv(1, candMvField[bestSadCand][1].mv, 0, 0); | |
1514 tempPred->cu.setPURefIdx(0, (int8_t)candMvField[bestSadCand][0].refIdx, 0, 0); | |
1515 tempPred->cu.setPURefIdx(1, (int8_t)candMvField[bestSadCand][1].refIdx, 0, 0); | |
1516 tempPred->sa8dCost = bestPred->sa8dCost; | |
1517 tempPred->sa8dBits = bestPred->sa8dBits; | |
1518 tempPred->predYuv.copyFromYuv(bestPred->predYuv); | |
1519 | |
1520 encodeResAndCalcRdInterCU(*tempPred, cuGeom); | |
1521 | |
1522 md.bestMode = tempPred->rdCost < bestPred->rdCost ? tempPred : bestPred; | |
1523 } | |
1524 else | |
1525 md.bestMode = bestPred; | |
1526 | |
1527 /* broadcast sets of MV field data */ | |
1528 md.bestMode->cu.setPUInterDir(candDir[bestSadCand], 0, 0); | |
1529 md.bestMode->cu.setPUMv(0, candMvField[bestSadCand][0].mv, 0, 0); | |
1530 md.bestMode->cu.setPUMv(1, candMvField[bestSadCand][1].mv, 0, 0); | |
1531 md.bestMode->cu.setPURefIdx(0, (int8_t)candMvField[bestSadCand][0].refIdx, 0, 0); | |
1532 md.bestMode->cu.setPURefIdx(1, (int8_t)candMvField[bestSadCand][1].refIdx, 0, 0); | |
1533 checkDQP(*md.bestMode, cuGeom); | |
1534 X265_CHECK(md.bestMode->ok(), "Merge mode not ok\n"); | |
1535 } | |
1536 | |
1537 /* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */ | |
1538 void Analysis::checkMerge2Nx2N_rd5_6(Mode& skip, Mode& merge, const CUGeom& cuGeom, bool isShareMergeCand) | |
1539 { | |
1540 uint32_t depth = cuGeom.depth; | |
1541 | |
1542 /* Note that these two Mode instances are named MERGE and SKIP but they may | |
1543 * hold the reverse when the function returns. We toggle between the two modes */ | |
1544 Mode* tempPred = &merge; | |
1545 Mode* bestPred = &skip; | |
1546 | |
1547 merge.initCosts(); | |
1548 merge.cu.setPredModeSubParts(MODE_INTER); | |
1549 merge.cu.setPartSizeSubParts(SIZE_2Nx2N); | |
1550 merge.cu.m_mergeFlag[0] = true; | |
1551 | |
1552 skip.initCosts(); | |
1553 skip.cu.setPredModeSubParts(MODE_INTER); | |
1554 skip.cu.setPartSizeSubParts(SIZE_2Nx2N); | |
1555 skip.cu.m_mergeFlag[0] = true; | |
1556 | |
1557 MVField candMvField[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists | |
1558 uint8_t candDir[MRG_MAX_NUM_CANDS]; | |
1559 uint32_t numMergeCand = merge.cu.getInterMergeCandidates(0, 0, candMvField, candDir); | |
1560 PredictionUnit pu(merge.cu, cuGeom, 0); | |
1561 | |
1562 bool foundCbf0Merge = false; | |
1563 bool triedPZero = false, triedBZero = false; | |
1564 bestPred->rdCost = MAX_INT64; | |
1565 | |
1566 uint32_t first = 0, last = numMergeCand; | |
1567 if (isShareMergeCand) | |
1568 { | |
1569 first = *m_reuseBestMergeCand; | |
1570 last = first + 1; | |
1571 } | |
1572 | |
1573 for (uint32_t i = first; i < last; i++) | |
1574 { | |
1575 if (m_bFrameParallel && | |
1576 (candMvField[i][0].mv.y >= (m_param->searchRange + 1) * 4 || | |
1577 candMvField[i][1].mv.y >= (m_param->searchRange + 1) * 4)) | |
1578 continue; | |
1579 | |
1580 /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */ | |
1581 if (candDir[i] == 1 && !candMvField[i][0].mv.word && !candMvField[i][0].refIdx) | |
1582 { | |
1583 if (triedPZero) | |
1584 continue; | |
1585 triedPZero = true; | |
1586 } | |
1587 else if (candDir[i] == 3 && | |
1588 !candMvField[i][0].mv.word && !candMvField[i][0].refIdx && | |
1589 !candMvField[i][1].mv.word && !candMvField[i][1].refIdx) | |
1590 { | |
1591 if (triedBZero) | |
1592 continue; | |
1593 triedBZero = true; | |
1594 } | |
1595 | |
1596 tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */ | |
1597 tempPred->cu.m_interDir[0] = candDir[i]; | |
1598 tempPred->cu.m_mv[0][0] = candMvField[i][0].mv; | |
1599 tempPred->cu.m_mv[1][0] = candMvField[i][1].mv; | |
1600 tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx; | |
1601 tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx; | |
1602 tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */ | |
1603 | |
1604 motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, true); | |
1605 | |
1606 uint8_t hasCbf = true; | |
1607 bool swapped = false; | |
1608 if (!foundCbf0Merge) | |
1609 { | |
1610 /* if the best prediction has CBF (not a skip) then try merge with residual */ | |
1611 | |
1612 encodeResAndCalcRdInterCU(*tempPred, cuGeom); | |
1613 hasCbf = tempPred->cu.getQtRootCbf(0); | |
1614 foundCbf0Merge = !hasCbf; | |
1615 | |
1616 if (tempPred->rdCost < bestPred->rdCost) | |
1617 { | |
1618 std::swap(tempPred, bestPred); | |
1619 swapped = true; | |
1620 } | |
1621 } | |
1622 if (!m_param->bLossless && hasCbf) | |
1623 { | |
1624 /* try merge without residual (skip), if not lossless coding */ | |
1625 | |
1626 if (swapped) | |
1627 { | |
1628 tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; | |
1629 tempPred->cu.m_interDir[0] = candDir[i]; | |
1630 tempPred->cu.m_mv[0][0] = candMvField[i][0].mv; | |
1631 tempPred->cu.m_mv[1][0] = candMvField[i][1].mv; | |
1632 tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx; | |
1633 tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx; | |
1634 tempPred->cu.setPredModeSubParts(MODE_INTER); | |
1635 tempPred->predYuv.copyFromYuv(bestPred->predYuv); | |
1636 } | |
1637 | |
1638 encodeResAndCalcRdSkipCU(*tempPred); | |
1639 | |
1640 if (tempPred->rdCost < bestPred->rdCost) | |
1641 std::swap(tempPred, bestPred); | |
1642 } | |
1643 } | |
1644 | |
1645 if (bestPred->rdCost < MAX_INT64) | |
1646 { | |
1647 m_modeDepth[depth].bestMode = bestPred; | |
1648 | |
1649 /* broadcast sets of MV field data */ | |
1650 uint32_t bestCand = bestPred->cu.m_mvpIdx[0][0]; | |
1651 bestPred->cu.setPUInterDir(candDir[bestCand], 0, 0); | |
1652 bestPred->cu.setPUMv(0, candMvField[bestCand][0].mv, 0, 0); | |
1653 bestPred->cu.setPUMv(1, candMvField[bestCand][1].mv, 0, 0); | |
1654 bestPred->cu.setPURefIdx(0, (int8_t)candMvField[bestCand][0].refIdx, 0, 0); | |
1655 bestPred->cu.setPURefIdx(1, (int8_t)candMvField[bestCand][1].refIdx, 0, 0); | |
1656 checkDQP(*bestPred, cuGeom); | |
1657 X265_CHECK(bestPred->ok(), "merge mode is not ok"); | |
1658 } | |
1659 | |
1660 if (m_param->analysisMode) | |
1661 { | |
1662 m_reuseBestMergeCand++; | |
1663 if (m_param->analysisMode == X265_ANALYSIS_SAVE) | |
1664 *m_reuseBestMergeCand = bestPred->cu.m_mvpIdx[0][0]; | |
1665 } | |
1666 } | |
1667 | |
1668 void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refMask[2]) | |
1669 { | |
1670 interMode.initCosts(); | |
1671 interMode.cu.setPartSizeSubParts(partSize); | |
1672 interMode.cu.setPredModeSubParts(MODE_INTER); | |
1673 int numPredDir = m_slice->isInterP() ? 1 : 2; | |
1674 | |
1675 if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_reuseInterDataCTU) | |
1676 { | |
1677 uint32_t numPU = interMode.cu.getNumPartInter(0); | |
1678 for (uint32_t part = 0; part < numPU; part++) | |
1679 { | |
1680 MotionData* bestME = interMode.bestME[part]; | |
1681 for (int32_t i = 0; i < numPredDir; i++) | |
1682 { | |
1683 bestME[i].ref = *m_reuseRef; | |
1684 m_reuseRef++; | |
1685 } | |
1686 } | |
1687 } | |
1688 | |
1689 predInterSearch(interMode, cuGeom, m_bChromaSa8d, refMask); | |
1690 | |
1691 /* predInterSearch sets interMode.sa8dBits */ | |
1692 const Yuv& fencYuv = *interMode.fencYuv; | |
1693 Yuv& predYuv = interMode.predYuv; | |
1694 int part = partitionFromLog2Size(cuGeom.log2CUSize); | |
1695 interMode.distortion = primitives.cu[part].sa8d(fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size); | |
1696 if (m_bChromaSa8d) | |
1697 { | |
1698 interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize); | |
1699 interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize); | |
1700 } | |
1701 interMode.sa8dCost = m_rdCost.calcRdSADCost((uint32_t)interMode.distortion, interMode.sa8dBits); | |
1702 | |
1703 if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU) | |
1704 { | |
1705 uint32_t numPU = interMode.cu.getNumPartInter(0); | |
1706 for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) | |
1707 { | |
1708 MotionData* bestME = interMode.bestME[puIdx]; | |
1709 for (int32_t i = 0; i < numPredDir; i++) | |
1710 { | |
1711 *m_reuseRef = bestME[i].ref; | |
1712 m_reuseRef++; | |
1713 } | |
1714 } | |
1715 } | |
1716 } | |
1717 | |
1718 void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refMask[2]) | |
1719 { | |
1720 interMode.initCosts(); | |
1721 interMode.cu.setPartSizeSubParts(partSize); | |
1722 interMode.cu.setPredModeSubParts(MODE_INTER); | |
1723 int numPredDir = m_slice->isInterP() ? 1 : 2; | |
1724 | |
1725 if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_reuseInterDataCTU) | |
1726 { | |
1727 uint32_t numPU = interMode.cu.getNumPartInter(0); | |
1728 for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) | |
1729 { | |
1730 MotionData* bestME = interMode.bestME[puIdx]; | |
1731 for (int32_t i = 0; i < numPredDir; i++) | |
1732 { | |
1733 bestME[i].ref = *m_reuseRef; | |
1734 m_reuseRef++; | |
1735 } | |
1736 } | |
1737 } | |
1738 | |
1739 predInterSearch(interMode, cuGeom, true, refMask); | |
1740 | |
1741 /* predInterSearch sets interMode.sa8dBits, but this is ignored */ | |
1742 encodeResAndCalcRdInterCU(interMode, cuGeom); | |
1743 | |
1744 if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU) | |
1745 { | |
1746 uint32_t numPU = interMode.cu.getNumPartInter(0); | |
1747 for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) | |
1748 { | |
1749 MotionData* bestME = interMode.bestME[puIdx]; | |
1750 for (int32_t i = 0; i < numPredDir; i++) | |
1751 { | |
1752 *m_reuseRef = bestME[i].ref; | |
1753 m_reuseRef++; | |
1754 } | |
1755 } | |
1756 } | |
1757 } | |
1758 | |
1759 void Analysis::checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom) | |
1760 { | |
1761 CUData& cu = bidir2Nx2N.cu; | |
1762 | |
1763 if (cu.isBipredRestriction() || inter2Nx2N.bestME[0][0].cost == MAX_UINT || inter2Nx2N.bestME[0][1].cost == MAX_UINT) | |
1764 { | |
1765 bidir2Nx2N.sa8dCost = MAX_INT64; | |
1766 bidir2Nx2N.rdCost = MAX_INT64; | |
1767 return; | |
1768 } | |
1769 | |
1770 const Yuv& fencYuv = *bidir2Nx2N.fencYuv; | |
1771 MV mvzero(0, 0); | |
1772 int partEnum = cuGeom.log2CUSize - 2; | |
1773 | |
1774 bidir2Nx2N.bestME[0][0] = inter2Nx2N.bestME[0][0]; | |
1775 bidir2Nx2N.bestME[0][1] = inter2Nx2N.bestME[0][1]; | |
1776 MotionData* bestME = bidir2Nx2N.bestME[0]; | |
1777 int ref0 = bestME[0].ref; | |
1778 MV mvp0 = bestME[0].mvp; | |
1779 int mvpIdx0 = bestME[0].mvpIdx; | |
1780 int ref1 = bestME[1].ref; | |
1781 MV mvp1 = bestME[1].mvp; | |
1782 int mvpIdx1 = bestME[1].mvpIdx; | |
1783 | |
1784 bidir2Nx2N.initCosts(); | |
1785 cu.setPartSizeSubParts(SIZE_2Nx2N); | |
1786 cu.setPredModeSubParts(MODE_INTER); | |
1787 cu.setPUInterDir(3, 0, 0); | |
1788 cu.setPURefIdx(0, (int8_t)ref0, 0, 0); | |
1789 cu.setPURefIdx(1, (int8_t)ref1, 0, 0); | |
1790 cu.m_mvpIdx[0][0] = (uint8_t)mvpIdx0; | |
1791 cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1; | |
1792 cu.m_mergeFlag[0] = 0; | |
1793 | |
1794 /* Estimate cost of BIDIR using best 2Nx2N L0 and L1 motion vectors */ | |
1795 cu.setPUMv(0, bestME[0].mv, 0, 0); | |
1796 cu.m_mvd[0][0] = bestME[0].mv - mvp0; | |
1797 | |
1798 cu.setPUMv(1, bestME[1].mv, 0, 0); | |
1799 cu.m_mvd[1][0] = bestME[1].mv - mvp1; | |
1800 | |
1801 PredictionUnit pu(cu, cuGeom, 0); | |
1802 motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_bChromaSa8d); | |
1803 | |
1804 int sa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size); | |
1805 if (m_bChromaSa8d) | |
1806 { | |
1807 /* Add in chroma distortion */ | |
1808 sa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize); | |
1809 sa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[2], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[2], bidir2Nx2N.predYuv.m_csize); | |
1810 } | |
1811 bidir2Nx2N.sa8dBits = bestME[0].bits + bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]); | |
1812 bidir2Nx2N.sa8dCost = sa8d + m_rdCost.getCost(bidir2Nx2N.sa8dBits); | |
1813 | |
1814 bool bTryZero = bestME[0].mv.notZero() || bestME[1].mv.notZero(); | |
1815 if (bTryZero) | |
1816 { | |
1817 /* Do not try zero MV if unidir motion predictors are beyond | |
1818 * valid search area */ | |
1819 MV mvmin, mvmax; | |
1820 int merange = X265_MAX(m_param->sourceWidth, m_param->sourceHeight); | |
1821 setSearchRange(cu, mvzero, merange, mvmin, mvmax); | |
1822 mvmax.y += 2; // there is some pad for subpel refine | |
1823 mvmin <<= 2; | |
1824 mvmax <<= 2; | |
1825 | |
1826 bTryZero &= bestME[0].mvp.checkRange(mvmin, mvmax); | |
1827 bTryZero &= bestME[1].mvp.checkRange(mvmin, mvmax); | |
1828 } | |
1829 if (bTryZero) | |
1830 { | |
1831 /* Estimate cost of BIDIR using coincident blocks */ | |
1832 Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv; | |
1833 | |
1834 int zsa8d; | |
1835 | |
1836 if (m_bChromaSa8d) | |
1837 { | |
1838 cu.m_mv[0][0] = mvzero; | |
1839 cu.m_mv[1][0] = mvzero; | |
1840 | |
1841 motionCompensation(cu, pu, tmpPredYuv, true, true); | |
1842 | |
1843 zsa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size); | |
1844 zsa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, tmpPredYuv.m_buf[1], tmpPredYuv.m_csize); | |
1845 zsa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[2], fencYuv.m_csize, tmpPredYuv.m_buf[2], tmpPredYuv.m_csize); | |
1846 } | |
1847 else | |
1848 { | |
1849 pixel *fref0 = m_slice->m_mref[0][ref0].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx); | |
1850 pixel *fref1 = m_slice->m_mref[1][ref1].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx); | |
1851 intptr_t refStride = m_slice->m_mref[0][0].lumaStride; | |
1852 | |
1853 primitives.pu[partEnum].pixelavg_pp(tmpPredYuv.m_buf[0], tmpPredYuv.m_size, fref0, refStride, fref1, refStride, 32); | |
1854 zsa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size); | |
1855 } | |
1856 | |
1857 uint32_t bits0 = bestME[0].bits - m_me.bitcost(bestME[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0); | |
1858 uint32_t bits1 = bestME[1].bits - m_me.bitcost(bestME[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1); | |
1859 uint32_t zcost = zsa8d + m_rdCost.getCost(bits0) + m_rdCost.getCost(bits1); | |
1860 | |
1861 /* refine MVP selection for zero mv, updates: mvp, mvpidx, bits, cost */ | |
1862 mvp0 = checkBestMVP(inter2Nx2N.amvpCand[0][ref0], mvzero, mvpIdx0, bits0, zcost); | |
1863 mvp1 = checkBestMVP(inter2Nx2N.amvpCand[1][ref1], mvzero, mvpIdx1, bits1, zcost); | |
1864 | |
1865 uint32_t zbits = bits0 + bits1 + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]); | |
1866 zcost = zsa8d + m_rdCost.getCost(zbits); | |
1867 | |
1868 if (zcost < bidir2Nx2N.sa8dCost) | |
1869 { | |
1870 bidir2Nx2N.sa8dBits = zbits; | |
1871 bidir2Nx2N.sa8dCost = zcost; | |
1872 | |
1873 cu.setPUMv(0, mvzero, 0, 0); | |
1874 cu.m_mvd[0][0] = mvzero - mvp0; | |
1875 cu.m_mvpIdx[0][0] = (uint8_t)mvpIdx0; | |
1876 | |
1877 cu.setPUMv(1, mvzero, 0, 0); | |
1878 cu.m_mvd[1][0] = mvzero - mvp1; | |
1879 cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1; | |
1880 | |
1881 if (m_bChromaSa8d) | |
1882 /* real MC was already performed */ | |
1883 bidir2Nx2N.predYuv.copyFromYuv(tmpPredYuv); | |
1884 else | |
1885 motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, true); | |
1886 } | |
1887 else if (m_bChromaSa8d) | |
1888 { | |
1889 /* recover overwritten motion vectors */ | |
1890 cu.m_mv[0][0] = bestME[0].mv; | |
1891 cu.m_mv[1][0] = bestME[1].mv; | |
1892 } | |
1893 } | |
1894 } | |
1895 | |
1896 void Analysis::encodeResidue(const CUData& ctu, const CUGeom& cuGeom) | |
1897 { | |
1898 if (cuGeom.depth < ctu.m_cuDepth[cuGeom.absPartIdx] && cuGeom.depth < g_maxCUDepth) | |
1899 { | |
1900 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) | |
1901 { | |
1902 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx); | |
1903 if (childGeom.flags & CUGeom::PRESENT) | |
1904 encodeResidue(ctu, childGeom); | |
1905 } | |
1906 return; | |
1907 } | |
1908 | |
1909 uint32_t absPartIdx = cuGeom.absPartIdx; | |
1910 int sizeIdx = cuGeom.log2CUSize - 2; | |
1911 | |
1912 /* reuse the bestMode data structures at the current depth */ | |
1913 Mode *bestMode = m_modeDepth[cuGeom.depth].bestMode; | |
1914 CUData& cu = bestMode->cu; | |
1915 | |
1916 cu.copyFromPic(ctu, cuGeom); | |
1917 | |
1918 PicYuv& reconPic = *m_frame->m_reconPic; | |
1919 | |
1920 Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv; | |
1921 if (cuGeom.depth) | |
1922 m_modeDepth[0].fencYuv.copyPartToYuv(fencYuv, absPartIdx); | |
1923 X265_CHECK(bestMode->fencYuv == &fencYuv, "invalid fencYuv\n"); | |
1924 | |
1925 if (cu.isIntra(0)) | |
1926 { | |
1927 ProfileCUScope(ctu, intraRDOElapsedTime[cuGeom.depth], countIntraRDO[cuGeom.depth]); // not really RDO, but close enough | |
1928 | |
1929 uint32_t tuDepthRange[2]; | |
1930 cu.getIntraTUQtDepthRange(tuDepthRange, 0); | |
1931 | |
1932 residualTransformQuantIntra(*bestMode, cuGeom, 0, 0, tuDepthRange); | |
1933 getBestIntraModeChroma(*bestMode, cuGeom); | |
1934 residualQTIntraChroma(*bestMode, cuGeom, 0, 0); | |
1935 } | |
1936 else // if (cu.isInter(0)) | |
1937 { | |
1938 ProfileCUScope(ctu, interRDOElapsedTime[cuGeom.depth], countInterRDO[cuGeom.depth]); // not really RDO, but close enough | |
1939 | |
1940 X265_CHECK(!ctu.isSkipped(absPartIdx), "skip not expected prior to transform\n"); | |
1941 | |
1942 /* Calculate residual for current CU part into depth sized resiYuv */ | |
1943 | |
1944 ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv; | |
1945 | |
1946 /* at RD 0, the prediction pixels are accumulated into the top depth predYuv */ | |
1947 Yuv& predYuv = m_modeDepth[0].bestMode->predYuv; | |
1948 pixel* predY = predYuv.getLumaAddr(absPartIdx); | |
1949 pixel* predU = predYuv.getCbAddr(absPartIdx); | |
1950 pixel* predV = predYuv.getCrAddr(absPartIdx); | |
1951 | |
1952 primitives.cu[sizeIdx].sub_ps(resiYuv.m_buf[0], resiYuv.m_size, | |
1953 fencYuv.m_buf[0], predY, | |
1954 fencYuv.m_size, predYuv.m_size); | |
1955 | |
1956 primitives.chroma[m_csp].cu[sizeIdx].sub_ps(resiYuv.m_buf[1], resiYuv.m_csize, | |
1957 fencYuv.m_buf[1], predU, | |
1958 fencYuv.m_csize, predYuv.m_csize); | |
1959 | |
1960 primitives.chroma[m_csp].cu[sizeIdx].sub_ps(resiYuv.m_buf[2], resiYuv.m_csize, | |
1961 fencYuv.m_buf[2], predV, | |
1962 fencYuv.m_csize, predYuv.m_csize); | |
1963 | |
1964 uint32_t tuDepthRange[2]; | |
1965 cu.getInterTUQtDepthRange(tuDepthRange, 0); | |
1966 | |
1967 residualTransformQuantInter(*bestMode, cuGeom, 0, 0, tuDepthRange); | |
1968 | |
1969 if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0)) | |
1970 cu.setPredModeSubParts(MODE_SKIP); | |
1971 | |
1972 /* residualTransformQuantInter() wrote transformed residual back into | |
1973 * resiYuv. Generate the recon pixels by adding it to the prediction */ | |
1974 | |
1975 if (cu.m_cbf[0][0]) | |
1976 primitives.cu[sizeIdx].add_ps(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride, | |
1977 predY, resiYuv.m_buf[0], predYuv.m_size, resiYuv.m_size); | |
1978 else | |
1979 primitives.cu[sizeIdx].copy_pp(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride, | |
1980 predY, predYuv.m_size); | |
1981 | |
1982 if (cu.m_cbf[1][0]) | |
1983 primitives.chroma[m_csp].cu[sizeIdx].add_ps(reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC, | |
1984 predU, resiYuv.m_buf[1], predYuv.m_csize, resiYuv.m_csize); | |
1985 else | |
1986 primitives.chroma[m_csp].cu[sizeIdx].copy_pp(reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC, | |
1987 predU, predYuv.m_csize); | |
1988 | |
1989 if (cu.m_cbf[2][0]) | |
1990 primitives.chroma[m_csp].cu[sizeIdx].add_ps(reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC, | |
1991 predV, resiYuv.m_buf[2], predYuv.m_csize, resiYuv.m_csize); | |
1992 else | |
1993 primitives.chroma[m_csp].cu[sizeIdx].copy_pp(reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC, | |
1994 predV, predYuv.m_csize); | |
1995 } | |
1996 | |
1997 cu.updatePic(cuGeom.depth); | |
1998 } | |
1999 | |
2000 void Analysis::addSplitFlagCost(Mode& mode, uint32_t depth) | |
2001 { | |
2002 if (m_param->rdLevel >= 3) | |
2003 { | |
2004 /* code the split flag (0 or 1) and update bit costs */ | |
2005 mode.contexts.resetBits(); | |
2006 mode.contexts.codeSplitFlag(mode.cu, 0, depth); | |
2007 uint32_t bits = mode.contexts.getNumberOfWrittenBits(); | |
2008 mode.mvBits += bits; | |
2009 mode.totalBits += bits; | |
2010 updateModeCost(mode); | |
2011 } | |
2012 else if (m_param->rdLevel <= 1) | |
2013 { | |
2014 mode.sa8dBits++; | |
2015 mode.sa8dCost = m_rdCost.calcRdSADCost((uint32_t)mode.distortion, mode.sa8dBits); | |
2016 } | |
2017 else | |
2018 { | |
2019 mode.mvBits++; | |
2020 mode.totalBits++; | |
2021 updateModeCost(mode); | |
2022 } | |
2023 } | |
2024 | |
2025 uint32_t Analysis::topSkipMinDepth(const CUData& parentCTU, const CUGeom& cuGeom) | |
2026 { | |
2027 /* Do not attempt to code a block larger than the largest block in the | |
2028 * co-located CTUs in L0 and L1 */ | |
2029 int currentQP = parentCTU.m_qp[0]; | |
2030 int previousQP = currentQP; | |
2031 uint32_t minDepth0 = 4, minDepth1 = 4; | |
2032 uint32_t sum = 0; | |
2033 int numRefs = 0; | |
2034 if (m_slice->m_numRefIdx[0]) | |
2035 { | |
2036 numRefs++; | |
2037 const CUData& cu = *m_slice->m_refFrameList[0][0]->m_encData->getPicCTU(parentCTU.m_cuAddr); | |
2038 previousQP = cu.m_qp[0]; | |
2039 if (!cu.m_cuDepth[cuGeom.absPartIdx]) | |
2040 return 0; | |
2041 for (uint32_t i = 0; i < cuGeom.numPartitions; i += 4) | |
2042 { | |
2043 uint32_t d = cu.m_cuDepth[cuGeom.absPartIdx + i]; | |
2044 minDepth0 = X265_MIN(d, minDepth0); | |
2045 sum += d; | |
2046 } | |
2047 } | |
2048 if (m_slice->m_numRefIdx[1]) | |
2049 { | |
2050 numRefs++; | |
2051 const CUData& cu = *m_slice->m_refFrameList[1][0]->m_encData->getPicCTU(parentCTU.m_cuAddr); | |
2052 if (!cu.m_cuDepth[cuGeom.absPartIdx]) | |
2053 return 0; | |
2054 for (uint32_t i = 0; i < cuGeom.numPartitions; i += 4) | |
2055 { | |
2056 uint32_t d = cu.m_cuDepth[cuGeom.absPartIdx + i]; | |
2057 minDepth1 = X265_MIN(d, minDepth1); | |
2058 sum += d; | |
2059 } | |
2060 } | |
2061 if (!numRefs) | |
2062 return 0; | |
2063 | |
2064 uint32_t minDepth = X265_MIN(minDepth0, minDepth1); | |
2065 uint32_t thresh = minDepth * numRefs * (cuGeom.numPartitions >> 2); | |
2066 | |
2067 /* allow block size growth if QP is raising or avg depth is | |
2068 * less than 1.5 of min depth */ | |
2069 if (minDepth && currentQP >= previousQP && (sum <= thresh + (thresh >> 1))) | |
2070 minDepth -= 1; | |
2071 | |
2072 return minDepth; | |
2073 } | |
2074 | |
2075 /* returns true if recursion should be stopped */ | |
2076 bool Analysis::recursionDepthCheck(const CUData& parentCTU, const CUGeom& cuGeom, const Mode& bestMode) | |
2077 { | |
2078 /* early exit when the RD cost of best mode at depth n is less than the sum | |
2079 * of average of RD cost of the neighbor CU's(above, aboveleft, aboveright, | |
2080 * left, colocated) and avg cost of that CU at depth "n" with weightage for | |
2081 * each quantity */ | |
2082 | |
2083 uint32_t depth = cuGeom.depth; | |
2084 FrameData& curEncData = *m_frame->m_encData; | |
2085 FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr]; | |
2086 uint64_t cuCost = cuStat.avgCost[depth] * cuStat.count[depth]; | |
2087 uint64_t cuCount = cuStat.count[depth]; | |
2088 | |
2089 uint64_t neighCost = 0, neighCount = 0; | |
2090 const CUData* above = parentCTU.m_cuAbove; | |
2091 if (above) | |
2092 { | |
2093 FrameData::RCStatCU& astat = curEncData.m_cuStat[above->m_cuAddr]; | |
2094 neighCost += astat.avgCost[depth] * astat.count[depth]; | |
2095 neighCount += astat.count[depth]; | |
2096 | |
2097 const CUData* aboveLeft = parentCTU.m_cuAboveLeft; | |
2098 if (aboveLeft) | |
2099 { | |
2100 FrameData::RCStatCU& lstat = curEncData.m_cuStat[aboveLeft->m_cuAddr]; | |
2101 neighCost += lstat.avgCost[depth] * lstat.count[depth]; | |
2102 neighCount += lstat.count[depth]; | |
2103 } | |
2104 | |
2105 const CUData* aboveRight = parentCTU.m_cuAboveRight; | |
2106 if (aboveRight) | |
2107 { | |
2108 FrameData::RCStatCU& rstat = curEncData.m_cuStat[aboveRight->m_cuAddr]; | |
2109 neighCost += rstat.avgCost[depth] * rstat.count[depth]; | |
2110 neighCount += rstat.count[depth]; | |
2111 } | |
2112 } | |
2113 const CUData* left = parentCTU.m_cuLeft; | |
2114 if (left) | |
2115 { | |
2116 FrameData::RCStatCU& nstat = curEncData.m_cuStat[left->m_cuAddr]; | |
2117 neighCost += nstat.avgCost[depth] * nstat.count[depth]; | |
2118 neighCount += nstat.count[depth]; | |
2119 } | |
2120 | |
2121 // give 60% weight to all CU's and 40% weight to neighbour CU's | |
2122 if (neighCount + cuCount) | |
2123 { | |
2124 uint64_t avgCost = ((3 * cuCost) + (2 * neighCost)) / ((3 * cuCount) + (2 * neighCount)); | |
2125 uint64_t curCost = m_param->rdLevel > 1 ? bestMode.rdCost : bestMode.sa8dCost; | |
2126 if (curCost < avgCost && avgCost) | |
2127 return true; | |
2128 } | |
2129 | |
2130 return false; | |
2131 } | |
2132 | |
2133 int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom) | |
2134 { | |
2135 FrameData& curEncData = *m_frame->m_encData; | |
2136 double qp = curEncData.m_cuStat[ctu.m_cuAddr].baseQp; | |
2137 | |
2138 /* Use cuTree offsets if cuTree enabled and frame is referenced, else use AQ offsets */ | |
2139 bool isReferenced = IS_REFERENCED(m_frame); | |
2140 double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset; | |
2141 if (qpoffs) | |
2142 { | |
2143 uint32_t width = m_frame->m_fencPic->m_picWidth; | |
2144 uint32_t height = m_frame->m_fencPic->m_picHeight; | |
2145 uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; | |
2146 uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; | |
2147 uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16; | |
2148 uint32_t blockSize = g_maxCUSize >> cuGeom.depth; | |
2149 double qp_offset = 0; | |
2150 uint32_t cnt = 0; | |
2151 uint32_t idx; | |
2152 | |
2153 for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += 16) | |
2154 { | |
2155 for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += 16) | |
2156 { | |
2157 idx = ((block_yy / 16) * (maxCols)) + (block_xx / 16); | |
2158 qp_offset += qpoffs[idx]; | |
2159 cnt++; | |
2160 } | |
2161 } | |
2162 | |
2163 qp_offset /= cnt; | |
2164 qp += qp_offset; | |
2165 } | |
2166 | |
2167 return x265_clip3(QP_MIN, QP_MAX_MAX, (int)(qp + 0.5)); | |
2168 } |