comparison x265/source/common/cudata.cpp @ 0:772086c29cc7

Initial import.
author Matti Hamalainen <ccr@tnsp.org>
date Wed, 16 Nov 2016 11:16:33 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:772086c29cc7
1 /*****************************************************************************
2 * Copyright (C) 2015 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24 #include "common.h"
25 #include "frame.h"
26 #include "framedata.h"
27 #include "picyuv.h"
28 #include "mv.h"
29 #include "cudata.h"
30
31 using namespace X265_NS;
32
33 /* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */
34
35 static void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; }
36
37 static void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; }
38 static void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101u * val; }
39
40 static void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; }
41 static void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; }
42
43 static void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1];
44 ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3];
45 ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5];
46 ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; }
47 static void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val;
48 ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval;
49 ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; }
50
51 /* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack,
52 * but hand-written assembly would beat it. */
53 static void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); }
54 static void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
55
56 namespace {
57 // file private namespace
58
59 /* Check whether 2 addresses point to the same column */
60 inline bool isEqualCol(int addrA, int addrB, int numUnits)
61 {
62 // addrA % numUnits == addrB % numUnits
63 return ((addrA ^ addrB) & (numUnits - 1)) == 0;
64 }
65
66 /* Check whether 2 addresses point to the same row */
67 inline bool isEqualRow(int addrA, int addrB, int numUnits)
68 {
69 // addrA / numUnits == addrB / numUnits
70 return ((addrA ^ addrB) & ~(numUnits - 1)) == 0;
71 }
72
73 /* Check whether 2 addresses point to the same row or column */
74 inline bool isEqualRowOrCol(int addrA, int addrB, int numUnits)
75 {
76 return isEqualCol(addrA, addrB, numUnits) | isEqualRow(addrA, addrB, numUnits);
77 }
78
79 /* Check whether one address points to the first column */
80 inline bool isZeroCol(int addr, int numUnits)
81 {
82 // addr % numUnits == 0
83 return (addr & (numUnits - 1)) == 0;
84 }
85
86 /* Check whether one address points to the first row */
87 inline bool isZeroRow(int addr, int numUnits)
88 {
89 // addr / numUnits == 0
90 return (addr & ~(numUnits - 1)) == 0;
91 }
92
93 /* Check whether one address points to a column whose index is smaller than a given value */
94 inline bool lessThanCol(int addr, int val, int numUnits)
95 {
96 // addr % numUnits < val
97 return (addr & (numUnits - 1)) < val;
98 }
99
100 /* Check whether one address points to a row whose index is smaller than a given value */
101 inline bool lessThanRow(int addr, int val, int numUnits)
102 {
103 // addr / numUnits < val
104 return addr < val * numUnits;
105 }
106
107 inline MV scaleMv(MV mv, int scale)
108 {
109 int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8);
110 int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8);
111
112 return MV((int16_t)mvx, (int16_t)mvy);
113 }
114
115 }
116
117 cubcast_t CUData::s_partSet[NUM_FULL_DEPTH] = { NULL, NULL, NULL, NULL, NULL };
118 uint32_t CUData::s_numPartInCUSize;
119
120 CUData::CUData()
121 {
122 memset(this, 0, sizeof(*this));
123 }
124
125 void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance)
126 {
127 m_chromaFormat = csp;
128 m_hChromaShift = CHROMA_H_SHIFT(csp);
129 m_vChromaShift = CHROMA_V_SHIFT(csp);
130 m_numPartitions = NUM_4x4_PARTITIONS >> (depth * 2);
131
132 if (!s_partSet[0])
133 {
134 s_numPartInCUSize = 1 << g_unitSizeDepth;
135 switch (g_maxLog2CUSize)
136 {
137 case 6:
138 s_partSet[0] = bcast256;
139 s_partSet[1] = bcast64;
140 s_partSet[2] = bcast16;
141 s_partSet[3] = bcast4;
142 s_partSet[4] = bcast1;
143 break;
144 case 5:
145 s_partSet[0] = bcast64;
146 s_partSet[1] = bcast16;
147 s_partSet[2] = bcast4;
148 s_partSet[3] = bcast1;
149 s_partSet[4] = NULL;
150 break;
151 case 4:
152 s_partSet[0] = bcast16;
153 s_partSet[1] = bcast4;
154 s_partSet[2] = bcast1;
155 s_partSet[3] = NULL;
156 s_partSet[4] = NULL;
157 break;
158 default:
159 X265_CHECK(0, "unexpected CTU size\n");
160 break;
161 }
162 }
163
164 switch (m_numPartitions)
165 {
166 case 256: // 64x64 CU
167 m_partCopy = copy256;
168 m_partSet = bcast256;
169 m_subPartCopy = copy64;
170 m_subPartSet = bcast64;
171 break;
172 case 64: // 32x32 CU
173 m_partCopy = copy64;
174 m_partSet = bcast64;
175 m_subPartCopy = copy16;
176 m_subPartSet = bcast16;
177 break;
178 case 16: // 16x16 CU
179 m_partCopy = copy16;
180 m_partSet = bcast16;
181 m_subPartCopy = copy4;
182 m_subPartSet = bcast4;
183 break;
184 case 4: // 8x8 CU
185 m_partCopy = copy4;
186 m_partSet = bcast4;
187 m_subPartCopy = NULL;
188 m_subPartSet = NULL;
189 break;
190 default:
191 X265_CHECK(0, "unexpected CU partition count\n");
192 break;
193 }
194
195 /* Each CU's data is layed out sequentially within the charMemBlock */
196 uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
197
198 m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
199 m_log2CUSize = charBuf; charBuf += m_numPartitions;
200 m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
201 m_tqBypass = charBuf; charBuf += m_numPartitions;
202 m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
203 m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
204 m_cuDepth = charBuf; charBuf += m_numPartitions;
205 m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
206 m_partSize = charBuf; charBuf += m_numPartitions;
207 m_mergeFlag = charBuf; charBuf += m_numPartitions;
208 m_interDir = charBuf; charBuf += m_numPartitions;
209 m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
210 m_mvpIdx[1] = charBuf; charBuf += m_numPartitions;
211 m_tuDepth = charBuf; charBuf += m_numPartitions;
212 m_transformSkip[0] = charBuf; charBuf += m_numPartitions;
213 m_transformSkip[1] = charBuf; charBuf += m_numPartitions;
214 m_transformSkip[2] = charBuf; charBuf += m_numPartitions;
215 m_cbf[0] = charBuf; charBuf += m_numPartitions;
216 m_cbf[1] = charBuf; charBuf += m_numPartitions;
217 m_cbf[2] = charBuf; charBuf += m_numPartitions;
218 m_chromaIntraDir = charBuf; charBuf += m_numPartitions;
219
220 X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
221
222 m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
223 m_mv[1] = m_mv[0] + m_numPartitions;
224 m_mvd[0] = m_mv[1] + m_numPartitions;
225 m_mvd[1] = m_mvd[0] + m_numPartitions;
226
227 uint32_t cuSize = g_maxCUSize >> depth;
228 uint32_t sizeL = cuSize * cuSize;
229 uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
230 m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
231 m_trCoeff[1] = m_trCoeff[0] + sizeL;
232 m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
233 }
234
235 void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp)
236 {
237 m_encData = frame.m_encData;
238 m_slice = m_encData->m_slice;
239 m_cuAddr = cuAddr;
240 m_cuPelX = (cuAddr % m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize;
241 m_cuPelY = (cuAddr / m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize;
242 m_absIdxInCTU = 0;
243 m_numPartitions = NUM_4x4_PARTITIONS;
244
245 /* sequential memsets */
246 m_partSet((uint8_t*)m_qp, (uint8_t)qp);
247 m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize);
248 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
249 m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless);
250 if (m_slice->m_sliceType != I_SLICE)
251 {
252 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
253 m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
254 }
255
256 X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
257
258 /* initialize the remaining CU data in one memset */
259 memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
260
261 uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
262 m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
263 m_cuAbove = (m_cuAddr / widthInCU) ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
264 m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
265 m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
266 }
267
268 // initialize Sub partition
269 void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
270 {
271 m_absIdxInCTU = cuGeom.absPartIdx;
272 m_encData = ctu.m_encData;
273 m_slice = ctu.m_slice;
274 m_cuAddr = ctu.m_cuAddr;
275 m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
276 m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
277 m_cuLeft = ctu.m_cuLeft;
278 m_cuAbove = ctu.m_cuAbove;
279 m_cuAboveLeft = ctu.m_cuAboveLeft;
280 m_cuAboveRight = ctu.m_cuAboveRight;
281 X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
282
283 m_partSet((uint8_t*)m_qp, (uint8_t)qp);
284
285 m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize);
286 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
287 m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless);
288 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
289 m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
290 m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
291
292 /* initialize the remaining CU data in one memset */
293 memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
294 }
295
296 /* Copy the results of a sub-part (split) CU to the parent CU */
297 void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx)
298 {
299 X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n");
300
301 uint32_t offset = childGeom.numPartitions * subPartIdx;
302
303 m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
304 m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
305 m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
306 m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
307 m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
308 m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
309 m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
310 m_subPartCopy(m_predMode + offset, subCU.m_predMode);
311 m_subPartCopy(m_partSize + offset, subCU.m_partSize);
312 m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
313 m_subPartCopy(m_interDir + offset, subCU.m_interDir);
314 m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
315 m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
316 m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
317 m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
318 m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
319 m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
320 m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
321 m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
322 m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
323 m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
324
325 memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
326 memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
327 memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
328 memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
329
330 uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2);
331 uint32_t tmp2 = subPartIdx * tmp;
332 memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t) * tmp);
333
334 uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
335 uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
336 memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
337 memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
338 }
339
340 /* If a sub-CU part is not present (off the edge of the picture) its depth and
341 * log2size should still be configured */
342 void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx)
343 {
344 uint32_t offset = childGeom.numPartitions * subPartIdx;
345 m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth);
346 m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize);
347 }
348
349 /* Copy all CU data from one instance to the next, except set lossless flag
350 * This will only get used when --cu-lossless is enabled but --lossless is not. */
351 void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom)
352 {
353 /* Start by making an exact copy */
354 m_encData = cu.m_encData;
355 m_slice = cu.m_slice;
356 m_cuAddr = cu.m_cuAddr;
357 m_cuPelX = cu.m_cuPelX;
358 m_cuPelY = cu.m_cuPelY;
359 m_cuLeft = cu.m_cuLeft;
360 m_cuAbove = cu.m_cuAbove;
361 m_cuAboveLeft = cu.m_cuAboveLeft;
362 m_cuAboveRight = cu.m_cuAboveRight;
363 m_absIdxInCTU = cuGeom.absPartIdx;
364 m_numPartitions = cuGeom.numPartitions;
365 memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions);
366 memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV));
367 memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV));
368 memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
369 memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
370
371 /* force TQBypass to true */
372 m_partSet(m_tqBypass, true);
373
374 /* clear residual coding flags */
375 m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER));
376 m_partSet(m_tuDepth, 0);
377 m_partSet(m_transformSkip[0], 0);
378 m_partSet(m_transformSkip[1], 0);
379 m_partSet(m_transformSkip[2], 0);
380 m_partSet(m_cbf[0], 0);
381 m_partSet(m_cbf[1], 0);
382 m_partSet(m_cbf[2], 0);
383 }
384
385 /* Copy completed predicted CU to CTU in picture */
386 void CUData::copyToPic(uint32_t depth) const
387 {
388 CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
389
390 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
391 m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
392 m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
393 m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
394 m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
395 m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
396 m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
397 m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
398 m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
399 m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
400 m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
401 m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
402 m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
403 m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
404 m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
405 m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
406 m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
407 m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
408 m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
409 m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
410 m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
411
412 memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
413 memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
414 memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
415 memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
416
417 uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
418 uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
419 memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
420
421 uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
422 uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
423 memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
424 memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
425 }
426
427 /* The reverse of copyToPic, called only by encodeResidue */
428 void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
429 {
430 m_encData = ctu.m_encData;
431 m_slice = ctu.m_slice;
432 m_cuAddr = ctu.m_cuAddr;
433 m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
434 m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
435 m_absIdxInCTU = cuGeom.absPartIdx;
436 m_numPartitions = cuGeom.numPartitions;
437
438 /* copy out all prediction info for this part */
439 m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
440 m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU);
441 m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
442 m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU);
443 m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
444 m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
445 m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU);
446 m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
447 m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU);
448 m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU);
449 m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU);
450 m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU);
451 m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU);
452 m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
453
454 memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
455 memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
456 memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
457 memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
458
459 /* clear residual coding flags */
460 m_partSet(m_tuDepth, 0);
461 m_partSet(m_transformSkip[0], 0);
462 m_partSet(m_transformSkip[1], 0);
463 m_partSet(m_transformSkip[2], 0);
464 m_partSet(m_cbf[0], 0);
465 m_partSet(m_cbf[1], 0);
466 m_partSet(m_cbf[2], 0);
467 }
468
469 /* Only called by encodeResidue, these fields can be modified during inter/intra coding */
470 void CUData::updatePic(uint32_t depth) const
471 {
472 CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
473
474 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
475 m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
476 m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
477 m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
478 m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
479 m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
480 m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
481 m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
482 m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
483 m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
484
485 uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
486 uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
487 memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
488 tmpY >>= m_hChromaShift + m_vChromaShift;
489 tmpY2 >>= m_hChromaShift + m_vChromaShift;
490 memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
491 memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
492 }
493
494 const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const
495 {
496 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
497
498 if (!isZeroCol(absPartIdx, s_numPartInCUSize))
499 {
500 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
501 lPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
502 if (isEqualCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
503 return m_encData->getPicCTU(m_cuAddr);
504 else
505 {
506 lPartUnitIdx -= m_absIdxInCTU;
507 return this;
508 }
509 }
510
511 lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1];
512 return m_cuLeft;
513 }
514
515 const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const
516 {
517 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
518
519 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
520 {
521 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
522 aPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize];
523 if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
524 return m_encData->getPicCTU(m_cuAddr);
525 else
526 aPartUnitIdx -= m_absIdxInCTU;
527 return this;
528 }
529
530 aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_4x4_PARTITIONS - s_numPartInCUSize];
531 return m_cuAbove;
532 }
533
534 const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const
535 {
536 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
537
538 if (!isZeroCol(absPartIdx, s_numPartInCUSize))
539 {
540 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
541 {
542 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
543 alPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize - 1];
544 if (isEqualRowOrCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
545 return m_encData->getPicCTU(m_cuAddr);
546 else
547 {
548 alPartUnitIdx -= m_absIdxInCTU;
549 return this;
550 }
551 }
552 alPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_4x4_PARTITIONS - s_numPartInCUSize - 1];
553 return m_cuAbove;
554 }
555
556 if (!isZeroRow(absPartIdx, s_numPartInCUSize))
557 {
558 alPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
559 return m_cuLeft;
560 }
561
562 alPartUnitIdx = g_rasterToZscan[NUM_4x4_PARTITIONS - 1];
563 return m_cuAboveLeft;
564 }
565
566 const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const
567 {
568 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
569 return NULL;
570
571 uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
572
573 if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1, s_numPartInCUSize))
574 {
575 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
576 {
577 if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1])
578 {
579 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
580 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1];
581 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize))
582 return m_encData->getPicCTU(m_cuAddr);
583 else
584 {
585 arPartUnitIdx -= m_absIdxInCTU;
586 return this;
587 }
588 }
589 return NULL;
590 }
591 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_4x4_PARTITIONS - s_numPartInCUSize + 1];
592 return m_cuAbove;
593 }
594
595 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
596 return NULL;
597
598 arPartUnitIdx = g_rasterToZscan[NUM_4x4_PARTITIONS - s_numPartInCUSize];
599 return m_cuAboveRight;
600 }
601
602 const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const
603 {
604 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
605 return NULL;
606
607 uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
608
609 if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1, s_numPartInCUSize))
610 {
611 if (!isZeroCol(absPartIdxLB, s_numPartInCUSize))
612 {
613 if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1])
614 {
615 uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize;
616 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1];
617 if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize))
618 return m_encData->getPicCTU(m_cuAddr);
619 else
620 {
621 blPartUnitIdx -= m_absIdxInCTU;
622 return this;
623 }
624 }
625 return NULL;
626 }
627 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize * 2 - 1];
628 return m_cuLeft;
629 }
630
631 return NULL;
632 }
633
634 const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
635 {
636 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples)
637 return NULL;
638
639 uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
640
641 if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize))
642 {
643 if (!isZeroCol(absPartIdxLB, s_numPartInCUSize))
644 {
645 if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1])
646 {
647 uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize;
648 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1];
649 if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize))
650 return m_encData->getPicCTU(m_cuAddr);
651 else
652 {
653 blPartUnitIdx -= m_absIdxInCTU;
654 return this;
655 }
656 }
657 return NULL;
658 }
659 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1];
660 return m_cuLeft;
661 }
662
663 return NULL;
664 }
665
666 const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
667 {
668 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples)
669 return NULL;
670
671 uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
672
673 if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize))
674 {
675 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
676 {
677 if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset])
678 {
679 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
680 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset];
681 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize))
682 return m_encData->getPicCTU(m_cuAddr);
683 else
684 {
685 arPartUnitIdx -= m_absIdxInCTU;
686 return this;
687 }
688 }
689 return NULL;
690 }
691 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_4x4_PARTITIONS - s_numPartInCUSize + partUnitOffset];
692 return m_cuAbove;
693 }
694
695 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
696 return NULL;
697
698 arPartUnitIdx = g_rasterToZscan[NUM_4x4_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1];
699 return m_cuAboveRight;
700 }
701
702 /* Get left QpMinCu */
703 const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const
704 {
705 uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
706 uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
707
708 // check for left CTU boundary
709 if (isZeroCol(absRorderQpMinCUIdx, s_numPartInCUSize))
710 return NULL;
711
712 // get index of left-CU relative to top-left corner of current quantization group
713 lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1];
714
715 // return pointer to current CTU
716 return m_encData->getPicCTU(m_cuAddr);
717 }
718
719 /* Get above QpMinCu */
720 const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const
721 {
722 uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
723 uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
724
725 // check for top CTU boundary
726 if (isZeroRow(absRorderQpMinCUIdx, s_numPartInCUSize))
727 return NULL;
728
729 // get index of top-CU relative to top-left corner of current quantization group
730 aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - s_numPartInCUSize];
731
732 // return pointer to current CTU
733 return m_encData->getPicCTU(m_cuAddr);
734 }
735
736 /* Get reference QP from left QpMinCu or latest coded QP */
737 int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const
738 {
739 uint32_t lPartIdx = 0, aPartIdx = 0;
740 const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
741 const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
742
743 return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1;
744 }
745
746 int CUData::getLastValidPartIdx(int absPartIdx) const
747 {
748 int lastValidPartIdx = absPartIdx - 1;
749
750 while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE)
751 {
752 uint32_t depth = m_cuDepth[lastValidPartIdx];
753 lastValidPartIdx -= m_numPartitions >> (depth << 1);
754 }
755
756 return lastValidPartIdx;
757 }
758
759 int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const
760 {
761 uint32_t quPartIdxMask = 0xFF << (g_unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2;
762 int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask);
763
764 if (lastValidPartIdx >= 0)
765 return m_qp[lastValidPartIdx];
766 else
767 {
768 if (m_absIdxInCTU)
769 return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU);
770 else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth)))
771 return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(NUM_4x4_PARTITIONS);
772 else
773 return (int8_t)m_slice->m_sliceQp;
774 }
775 }
776
777 /* Get allowed chroma intra modes */
778 void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const
779 {
780 modeList[0] = PLANAR_IDX;
781 modeList[1] = VER_IDX;
782 modeList[2] = HOR_IDX;
783 modeList[3] = DC_IDX;
784 modeList[4] = DM_CHROMA_IDX;
785
786 uint32_t lumaMode = m_lumaIntraDir[absPartIdx];
787
788 for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
789 {
790 if (lumaMode == modeList[i])
791 {
792 modeList[i] = 34; // VER+8 mode
793 break;
794 }
795 }
796 }
797
798 /* Get most probable intra modes */
799 int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const
800 {
801 const CUData* tempCU;
802 uint32_t tempPartIdx;
803 uint32_t leftIntraDir, aboveIntraDir;
804
805 // Get intra direction of left PU
806 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
807
808 leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
809
810 // Get intra direction of above PU
811 tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL;
812
813 aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
814
815 if (leftIntraDir == aboveIntraDir)
816 {
817 if (leftIntraDir >= 2) // angular modes
818 {
819 intraDirPred[0] = leftIntraDir;
820 intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2;
821 intraDirPred[2] = ((leftIntraDir - 2 + 1) & 31) + 2;
822 }
823 else //non-angular
824 {
825 intraDirPred[0] = PLANAR_IDX;
826 intraDirPred[1] = DC_IDX;
827 intraDirPred[2] = VER_IDX;
828 }
829 return 1;
830 }
831 else
832 {
833 intraDirPred[0] = leftIntraDir;
834 intraDirPred[1] = aboveIntraDir;
835
836 if (leftIntraDir && aboveIntraDir) //both modes are non-planar
837 intraDirPred[2] = PLANAR_IDX;
838 else
839 intraDirPred[2] = (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX;
840 return 2;
841 }
842 }
843
844 uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const
845 {
846 const CUData* tempCU;
847 uint32_t tempPartIdx;
848 uint32_t ctx;
849
850 // Get left split flag
851 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
852 ctx = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
853
854 // Get above split flag
855 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
856 ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
857
858 return ctx;
859 }
860
861 void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
862 {
863 uint32_t log2CUSize = m_log2CUSize[absPartIdx];
864 uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N;
865
866 tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
867 tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
868
869 tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag));
870 }
871
872 void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
873 {
874 uint32_t log2CUSize = m_log2CUSize[absPartIdx];
875 uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter;
876 uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N;
877
878 tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
879 tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
880
881 tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag));
882 }
883
884 uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
885 {
886 const CUData* tempCU;
887 uint32_t tempPartIdx;
888 uint32_t ctx;
889
890 // Get BCBP of left PU
891 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
892 ctx = tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
893
894 // Get BCBP of above PU
895 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
896 ctx += tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
897
898 return ctx;
899 }
900
901 bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth)
902 {
903 uint32_t curPartNumb = NUM_4x4_PARTITIONS >> (depth << 1);
904 uint32_t curPartNumQ = curPartNumb >> 2;
905
906 if (m_cuDepth[absPartIdx] > depth)
907 {
908 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
909 if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1))
910 return true;
911 }
912 else
913 {
914 if (getQtRootCbf(absPartIdx))
915 return true;
916 else
917 setQPSubParts(qp, absPartIdx, depth);
918 }
919
920 return false;
921 }
922
923 void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx)
924 {
925 uint32_t curPartNumQ = m_numPartitions >> 2;
926 X265_CHECK(puIdx < 2, "unexpected part unit index\n");
927
928 switch (m_partSize[absPartIdx])
929 {
930 case SIZE_2Nx2N:
931 memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ);
932 break;
933 case SIZE_2NxN:
934 memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ);
935 break;
936 case SIZE_Nx2N:
937 memset(m_interDir + absPartIdx, dir, curPartNumQ);
938 memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ);
939 break;
940 case SIZE_NxN:
941 memset(m_interDir + absPartIdx, dir, curPartNumQ);
942 break;
943 case SIZE_2NxnU:
944 if (!puIdx)
945 {
946 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
947 memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
948 }
949 else
950 {
951 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
952 memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1)));
953 }
954 break;
955 case SIZE_2NxnD:
956 if (!puIdx)
957 {
958 memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1)));
959 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1));
960 }
961 else
962 {
963 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
964 memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
965 }
966 break;
967 case SIZE_nLx2N:
968 if (!puIdx)
969 {
970 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
971 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
972 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
973 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
974 }
975 else
976 {
977 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
978 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
979 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
980 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
981 }
982 break;
983 case SIZE_nRx2N:
984 if (!puIdx)
985 {
986 memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2)));
987 memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
988 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
989 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
990 }
991 else
992 {
993 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
994 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
995 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
996 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
997 }
998 break;
999 default:
1000 X265_CHECK(0, "unexpected part type\n");
1001 break;
1002 }
1003 }
1004
1005 template<typename T>
1006 void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx)
1007 {
1008 int i;
1009
1010 p += absPartIdx;
1011 int numElements = m_numPartitions;
1012
1013 switch (m_partSize[absPartIdx])
1014 {
1015 case SIZE_2Nx2N:
1016 for (i = 0; i < numElements; i++)
1017 p[i] = val;
1018 break;
1019
1020 case SIZE_2NxN:
1021 numElements >>= 1;
1022 for (i = 0; i < numElements; i++)
1023 p[i] = val;
1024 break;
1025
1026 case SIZE_Nx2N:
1027 numElements >>= 2;
1028 for (i = 0; i < numElements; i++)
1029 {
1030 p[i] = val;
1031 p[i + 2 * numElements] = val;
1032 }
1033 break;
1034
1035 case SIZE_2NxnU:
1036 {
1037 int curPartNumQ = numElements >> 2;
1038 if (!puIdx)
1039 {
1040 T *pT = p;
1041 T *pT2 = p + curPartNumQ;
1042 for (i = 0; i < (curPartNumQ >> 1); i++)
1043 {
1044 pT[i] = val;
1045 pT2[i] = val;
1046 }
1047 }
1048 else
1049 {
1050 T *pT = p;
1051 for (i = 0; i < (curPartNumQ >> 1); i++)
1052 pT[i] = val;
1053
1054 pT = p + curPartNumQ;
1055 for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1056 pT[i] = val;
1057 }
1058 break;
1059 }
1060
1061 case SIZE_2NxnD:
1062 {
1063 int curPartNumQ = numElements >> 2;
1064 if (!puIdx)
1065 {
1066 T *pT = p;
1067 for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1068 pT[i] = val;
1069
1070 pT = p + (numElements - curPartNumQ);
1071 for (i = 0; i < (curPartNumQ >> 1); i++)
1072 pT[i] = val;
1073 }
1074 else
1075 {
1076 T *pT = p;
1077 T *pT2 = p + curPartNumQ;
1078 for (i = 0; i < (curPartNumQ >> 1); i++)
1079 {
1080 pT[i] = val;
1081 pT2[i] = val;
1082 }
1083 }
1084 break;
1085 }
1086
1087 case SIZE_nLx2N:
1088 {
1089 int curPartNumQ = numElements >> 2;
1090 if (!puIdx)
1091 {
1092 T *pT = p;
1093 T *pT2 = p + (curPartNumQ << 1);
1094 T *pT3 = p + (curPartNumQ >> 1);
1095 T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1096
1097 for (i = 0; i < (curPartNumQ >> 2); i++)
1098 {
1099 pT[i] = val;
1100 pT2[i] = val;
1101 pT3[i] = val;
1102 pT4[i] = val;
1103 }
1104 }
1105 else
1106 {
1107 T *pT = p;
1108 T *pT2 = p + (curPartNumQ << 1);
1109 for (i = 0; i < (curPartNumQ >> 2); i++)
1110 {
1111 pT[i] = val;
1112 pT2[i] = val;
1113 }
1114
1115 pT = p + (curPartNumQ >> 1);
1116 pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1117 for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1118 {
1119 pT[i] = val;
1120 pT2[i] = val;
1121 }
1122 }
1123 break;
1124 }
1125
1126 case SIZE_nRx2N:
1127 {
1128 int curPartNumQ = numElements >> 2;
1129 if (!puIdx)
1130 {
1131 T *pT = p;
1132 T *pT2 = p + (curPartNumQ << 1);
1133 for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1134 {
1135 pT[i] = val;
1136 pT2[i] = val;
1137 }
1138
1139 pT = p + curPartNumQ + (curPartNumQ >> 1);
1140 pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1);
1141 for (i = 0; i < (curPartNumQ >> 2); i++)
1142 {
1143 pT[i] = val;
1144 pT2[i] = val;
1145 }
1146 }
1147 else
1148 {
1149 T *pT = p;
1150 T *pT2 = p + (curPartNumQ >> 1);
1151 T *pT3 = p + (curPartNumQ << 1);
1152 T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1153 for (i = 0; i < (curPartNumQ >> 2); i++)
1154 {
1155 pT[i] = val;
1156 pT2[i] = val;
1157 pT3[i] = val;
1158 pT4[i] = val;
1159 }
1160 }
1161 break;
1162 }
1163
1164 case SIZE_NxN:
1165 default:
1166 X265_CHECK(0, "unknown partition type\n");
1167 break;
1168 }
1169 }
1170
1171 void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx)
1172 {
1173 setAllPU(m_mv[list], mv, absPartIdx, puIdx);
1174 }
1175
1176 void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx)
1177 {
1178 setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx);
1179 }
1180
1181 void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const
1182 {
1183 int cuSize = 1 << m_log2CUSize[0];
1184 int partType = m_partSize[0];
1185
1186 int tmp = partTable[partType][partIdx][0];
1187 outWidth = ((tmp >> 4) * cuSize) >> 2;
1188 outHeight = ((tmp & 0xF) * cuSize) >> 2;
1189 outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4;
1190 }
1191
1192 void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const
1193 {
1194 if (cu)
1195 {
1196 outMvField.mv = cu->m_mv[picList][absPartIdx];
1197 outMvField.refIdx = cu->m_refIdx[picList][absPartIdx];
1198 }
1199 else
1200 {
1201 // OUT OF BOUNDARY
1202 outMvField.mv = 0;
1203 outMvField.refIdx = REF_NOT_VALID;
1204 }
1205 }
1206
1207 void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const
1208 {
1209 partIdxLT = m_absIdxInCTU;
1210 partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1211
1212 switch (m_partSize[0])
1213 {
1214 case SIZE_2Nx2N: break;
1215 case SIZE_2NxN:
1216 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1217 partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1218 break;
1219 case SIZE_Nx2N:
1220 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2;
1221 partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2;
1222 break;
1223 case SIZE_NxN:
1224 partIdxLT += (m_numPartitions >> 2) * partIdx;
1225 partIdxRT += (m_numPartitions >> 2) * (partIdx - 1);
1226 break;
1227 case SIZE_2NxnU:
1228 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1229 partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1230 break;
1231 case SIZE_2NxnD:
1232 partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1233 partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1234 break;
1235 case SIZE_nLx2N:
1236 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4;
1237 partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1238 break;
1239 case SIZE_nRx2N:
1240 partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1241 partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4;
1242 break;
1243 default:
1244 X265_CHECK(0, "unexpected part index\n");
1245 break;
1246 }
1247 }
1248
1249 uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const
1250 {
1251 uint32_t outPartIdxLB;
1252 outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize];
1253
1254 switch (m_partSize[0])
1255 {
1256 case SIZE_2Nx2N:
1257 outPartIdxLB += m_numPartitions >> 1;
1258 break;
1259 case SIZE_2NxN:
1260 outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0;
1261 break;
1262 case SIZE_Nx2N:
1263 outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1;
1264 break;
1265 case SIZE_NxN:
1266 outPartIdxLB += (m_numPartitions >> 2) * puIdx;
1267 break;
1268 case SIZE_2NxnU:
1269 outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1270 break;
1271 case SIZE_2NxnD:
1272 outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1273 break;
1274 case SIZE_nLx2N:
1275 outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1276 break;
1277 case SIZE_nRx2N:
1278 outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1279 break;
1280 default:
1281 X265_CHECK(0, "unexpected part index\n");
1282 break;
1283 }
1284 return outPartIdxLB;
1285 }
1286
1287 /* Derives the partition index of neighboring bottom right block */
1288 uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const
1289 {
1290 uint32_t outPartIdxRB;
1291 outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] +
1292 ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize +
1293 (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1294
1295 switch (m_partSize[0])
1296 {
1297 case SIZE_2Nx2N:
1298 outPartIdxRB += m_numPartitions >> 1;
1299 break;
1300 case SIZE_2NxN:
1301 outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0;
1302 break;
1303 case SIZE_Nx2N:
1304 outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2;
1305 break;
1306 case SIZE_NxN:
1307 outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1);
1308 break;
1309 case SIZE_2NxnU:
1310 outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1311 break;
1312 case SIZE_2NxnD:
1313 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1314 break;
1315 case SIZE_nLx2N:
1316 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4);
1317 break;
1318 case SIZE_nRx2N:
1319 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4);
1320 break;
1321 default:
1322 X265_CHECK(0, "unexpected part index\n");
1323 break;
1324 }
1325 return outPartIdxRB;
1326 }
1327
1328 bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const
1329 {
1330 if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
1331 return false;
1332
1333 for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++)
1334 {
1335 if (m_interDir[absPartIdx] & (1 << refListIdx))
1336 {
1337 if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] ||
1338 m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx])
1339 return false;
1340 }
1341 }
1342
1343 return true;
1344 }
1345
1346 /* Construct list of merging candidates, returns count */
1347 uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const
1348 {
1349 uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1350 const bool isInterB = m_slice->isInterB();
1351
1352 const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand;
1353
1354 for (uint32_t i = 0; i < maxNumMergeCand; ++i)
1355 {
1356 candMvField[i][0].mv = 0;
1357 candMvField[i][1].mv = 0;
1358 candMvField[i][0].refIdx = REF_NOT_VALID;
1359 candMvField[i][1].refIdx = REF_NOT_VALID;
1360 }
1361
1362 /* calculate the location of upper-left corner pixel and size of the current PU */
1363 int xP, yP, nPSW, nPSH;
1364
1365 int cuSize = 1 << m_log2CUSize[0];
1366 int partMode = m_partSize[0];
1367
1368 int tmp = partTable[partMode][puIdx][0];
1369 nPSW = ((tmp >> 4) * cuSize) >> 2;
1370 nPSH = ((tmp & 0xF) * cuSize) >> 2;
1371
1372 tmp = partTable[partMode][puIdx][1];
1373 xP = ((tmp >> 4) * cuSize) >> 2;
1374 yP = ((tmp & 0xF) * cuSize) >> 2;
1375
1376 uint32_t count = 0;
1377
1378 uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1379 PartSize curPS = (PartSize)m_partSize[absPartIdx];
1380
1381 // left
1382 uint32_t leftPartIdx = 0;
1383 const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB);
1384 bool isAvailableA1 = cuLeft &&
1385 cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) &&
1386 !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) &&
1387 cuLeft->isInter(leftPartIdx);
1388 if (isAvailableA1)
1389 {
1390 // get Inter Dir
1391 candDir[count] = cuLeft->m_interDir[leftPartIdx];
1392 // get Mv from Left
1393 cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]);
1394 if (isInterB)
1395 cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]);
1396
1397 if (++count == maxNumMergeCand)
1398 return maxNumMergeCand;
1399 }
1400
1401 deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1402
1403 // above
1404 uint32_t abovePartIdx = 0;
1405 const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT);
1406 bool isAvailableB1 = cuAbove &&
1407 cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) &&
1408 !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) &&
1409 cuAbove->isInter(abovePartIdx);
1410 if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx)))
1411 {
1412 // get Inter Dir
1413 candDir[count] = cuAbove->m_interDir[abovePartIdx];
1414 // get Mv from Left
1415 cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]);
1416 if (isInterB)
1417 cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]);
1418
1419 if (++count == maxNumMergeCand)
1420 return maxNumMergeCand;
1421 }
1422
1423 // above right
1424 uint32_t aboveRightPartIdx = 0;
1425 const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT);
1426 bool isAvailableB0 = cuAboveRight &&
1427 cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) &&
1428 cuAboveRight->isInter(aboveRightPartIdx);
1429 if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx)))
1430 {
1431 // get Inter Dir
1432 candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx];
1433 // get Mv from Left
1434 cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]);
1435 if (isInterB)
1436 cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]);
1437
1438 if (++count == maxNumMergeCand)
1439 return maxNumMergeCand;
1440 }
1441
1442 // left bottom
1443 uint32_t leftBottomPartIdx = 0;
1444 const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB);
1445 bool isAvailableA0 = cuLeftBottom &&
1446 cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) &&
1447 cuLeftBottom->isInter(leftBottomPartIdx);
1448 if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx)))
1449 {
1450 // get Inter Dir
1451 candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx];
1452 // get Mv from Left
1453 cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]);
1454 if (isInterB)
1455 cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]);
1456
1457 if (++count == maxNumMergeCand)
1458 return maxNumMergeCand;
1459 }
1460
1461 // above left
1462 if (count < 4)
1463 {
1464 uint32_t aboveLeftPartIdx = 0;
1465 const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr);
1466 bool isAvailableB2 = cuAboveLeft &&
1467 cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) &&
1468 cuAboveLeft->isInter(aboveLeftPartIdx);
1469 if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx))
1470 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx)))
1471 {
1472 // get Inter Dir
1473 candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx];
1474 // get Mv from Left
1475 cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]);
1476 if (isInterB)
1477 cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]);
1478
1479 if (++count == maxNumMergeCand)
1480 return maxNumMergeCand;
1481 }
1482 }
1483 if (m_slice->m_sps->bTemporalMVPEnabled)
1484 {
1485 uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1486 MV colmv;
1487 int ctuIdx = -1;
1488
1489 // image boundary check
1490 if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1491 m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1492 {
1493 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1494 uint32_t numUnits = s_numPartInCUSize;
1495 bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU
1496 bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last row of CTU
1497
1498 if (bNotLastCol && bNotLastRow)
1499 {
1500 absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1];
1501 ctuIdx = m_cuAddr;
1502 }
1503 else if (bNotLastCol)
1504 absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) & (numUnits - 1)];
1505 else if (bNotLastRow)
1506 {
1507 absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
1508 ctuIdx = m_cuAddr + 1;
1509 }
1510 else // is the right bottom corner of CTU
1511 absPartAddr = 0;
1512 }
1513
1514 int maxList = isInterB ? 2 : 1;
1515 int dir = 0, refIdx = 0;
1516 for (int list = 0; list < maxList; list++)
1517 {
1518 bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, list, ctuIdx, absPartAddr);
1519 if (!bExistMV)
1520 {
1521 uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1522 bExistMV = getColMVP(colmv, refIdx, list, m_cuAddr, partIdxCenter);
1523 }
1524 if (bExistMV)
1525 {
1526 dir |= (1 << list);
1527 candMvField[count][list].mv = colmv;
1528 candMvField[count][list].refIdx = refIdx;
1529 }
1530 }
1531
1532 if (dir != 0)
1533 {
1534 candDir[count] = (uint8_t)dir;
1535
1536 if (++count == maxNumMergeCand)
1537 return maxNumMergeCand;
1538 }
1539 }
1540
1541 if (isInterB)
1542 {
1543 const uint32_t cutoff = count * (count - 1);
1544 uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }
1545 uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }
1546
1547 for (uint32_t idx = 0; idx < cutoff; idx++, priorityList0 >>= 2, priorityList1 >>= 2)
1548 {
1549 int i = priorityList0 & 3;
1550 int j = priorityList1 & 3;
1551
1552 if ((candDir[i] & 0x1) && (candDir[j] & 0x2))
1553 {
1554 // get Mv from cand[i] and cand[j]
1555 int refIdxL0 = candMvField[i][0].refIdx;
1556 int refIdxL1 = candMvField[j][1].refIdx;
1557 int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0];
1558 int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1];
1559 if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv))
1560 {
1561 candMvField[count][0].mv = candMvField[i][0].mv;
1562 candMvField[count][0].refIdx = refIdxL0;
1563 candMvField[count][1].mv = candMvField[j][1].mv;
1564 candMvField[count][1].refIdx = refIdxL1;
1565 candDir[count] = 3;
1566
1567 if (++count == maxNumMergeCand)
1568 return maxNumMergeCand;
1569 }
1570 }
1571 }
1572 }
1573 int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0];
1574 int r = 0;
1575 int refcnt = 0;
1576 while (count < maxNumMergeCand)
1577 {
1578 candDir[count] = 1;
1579 candMvField[count][0].mv.word = 0;
1580 candMvField[count][0].refIdx = r;
1581
1582 if (isInterB)
1583 {
1584 candDir[count] = 3;
1585 candMvField[count][1].mv.word = 0;
1586 candMvField[count][1].refIdx = r;
1587 }
1588
1589 count++;
1590
1591 if (refcnt == numRefIdx - 1)
1592 r = 0;
1593 else
1594 {
1595 ++r;
1596 ++refcnt;
1597 }
1598 }
1599
1600 return count;
1601 }
1602
1603 // Create the PMV list. Called for each reference index.
1604 int CUData::getPMV(InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv) const
1605 {
1606 MV directMV[MD_ABOVE_LEFT + 1];
1607 MV indirectMV[MD_ABOVE_LEFT + 1];
1608 bool validDirect[MD_ABOVE_LEFT + 1];
1609 bool validIndirect[MD_ABOVE_LEFT + 1];
1610
1611 // Left candidate.
1612 validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1613 validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1614 // Top candidate.
1615 validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1616 validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1617 validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1618
1619 // Left candidate.
1620 validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1621 validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1622 // Top candidate.
1623 validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1624 validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1625 validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1626
1627 int num = 0;
1628 // Left predictor search
1629 if (validDirect[MD_BELOW_LEFT])
1630 amvpCand[num++] = directMV[MD_BELOW_LEFT];
1631 else if (validDirect[MD_LEFT])
1632 amvpCand[num++] = directMV[MD_LEFT];
1633 else if (validIndirect[MD_BELOW_LEFT])
1634 amvpCand[num++] = indirectMV[MD_BELOW_LEFT];
1635 else if (validIndirect[MD_LEFT])
1636 amvpCand[num++] = indirectMV[MD_LEFT];
1637
1638 bool bAddedSmvp = num > 0;
1639
1640 // Above predictor search
1641 if (validDirect[MD_ABOVE_RIGHT])
1642 amvpCand[num++] = directMV[MD_ABOVE_RIGHT];
1643 else if (validDirect[MD_ABOVE])
1644 amvpCand[num++] = directMV[MD_ABOVE];
1645 else if (validDirect[MD_ABOVE_LEFT])
1646 amvpCand[num++] = directMV[MD_ABOVE_LEFT];
1647
1648 if (!bAddedSmvp)
1649 {
1650 if (validIndirect[MD_ABOVE_RIGHT])
1651 amvpCand[num++] = indirectMV[MD_ABOVE_RIGHT];
1652 else if (validIndirect[MD_ABOVE])
1653 amvpCand[num++] = indirectMV[MD_ABOVE];
1654 else if (validIndirect[MD_ABOVE_LEFT])
1655 amvpCand[num++] = indirectMV[MD_ABOVE_LEFT];
1656 }
1657
1658 int numMvc = 0;
1659 for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++)
1660 {
1661 if (validDirect[dir] && directMV[dir].notZero())
1662 pmv[numMvc++] = directMV[dir];
1663
1664 if (validIndirect[dir] && indirectMV[dir].notZero())
1665 pmv[numMvc++] = indirectMV[dir];
1666 }
1667
1668 if (num == 2)
1669 num -= amvpCand[0] == amvpCand[1];
1670
1671 // Get the collocated candidate. At this step, either the first candidate
1672 // was found or its value is 0.
1673 if (m_slice->m_sps->bTemporalMVPEnabled && num < 2)
1674 {
1675 int tempRefIdx = neighbours[MD_COLLOCATED].refIdx[picList];
1676 if (tempRefIdx != -1)
1677 {
1678 uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList];
1679 const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
1680 const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
1681
1682 // Scale the vector
1683 int colRefPOC = colCU->m_slice->m_refPOCList[tempRefIdx >> 4][tempRefIdx & 0xf];
1684 int colPOC = colCU->m_slice->m_poc;
1685
1686 int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1687 int curPOC = m_slice->m_poc;
1688
1689 pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC);
1690 }
1691 }
1692
1693 while (num < AMVP_NUM_CANDS)
1694 amvpCand[num++] = 0;
1695
1696 return numMvc;
1697 }
1698
1699 /* Constructs a list of candidates for AMVP, and a larger list of motion candidates */
1700 void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const
1701 {
1702 // Set the temporal neighbour to unavailable by default.
1703 neighbours[MD_COLLOCATED].unifiedRef = -1;
1704
1705 uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1706 deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1707
1708 // Load the spatial MVs.
1709 getInterNeighbourMV(neighbours + MD_BELOW_LEFT, partIdxLB, MD_BELOW_LEFT);
1710 getInterNeighbourMV(neighbours + MD_LEFT, partIdxLB, MD_LEFT);
1711 getInterNeighbourMV(neighbours + MD_ABOVE_RIGHT,partIdxRT, MD_ABOVE_RIGHT);
1712 getInterNeighbourMV(neighbours + MD_ABOVE, partIdxRT, MD_ABOVE);
1713 getInterNeighbourMV(neighbours + MD_ABOVE_LEFT, partIdxLT, MD_ABOVE_LEFT);
1714
1715 if (m_slice->m_sps->bTemporalMVPEnabled)
1716 {
1717 uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1718 uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1719
1720 // co-located RightBottom temporal predictor (H)
1721 int ctuIdx = -1;
1722
1723 // image boundary check
1724 if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1725 m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1726 {
1727 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1728 uint32_t numUnits = s_numPartInCUSize;
1729 bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU
1730 bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last row of CTU
1731
1732 if (bNotLastCol && bNotLastRow)
1733 {
1734 absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1];
1735 ctuIdx = m_cuAddr;
1736 }
1737 else if (bNotLastCol)
1738 absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) & (numUnits - 1)];
1739 else if (bNotLastRow)
1740 {
1741 absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
1742 ctuIdx = m_cuAddr + 1;
1743 }
1744 else // is the right bottom corner of CTU
1745 absPartAddr = 0;
1746 }
1747
1748 if (!(ctuIdx >= 0 && getCollocatedMV(ctuIdx, absPartAddr, neighbours + MD_COLLOCATED)))
1749 {
1750 uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1751 uint32_t curCTUIdx = m_cuAddr;
1752 getCollocatedMV(curCTUIdx, partIdxCenter, neighbours + MD_COLLOCATED);
1753 }
1754 }
1755 }
1756
1757 void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const
1758 {
1759 const CUData* tmpCU = NULL;
1760 uint32_t idx = 0;
1761
1762 switch (dir)
1763 {
1764 case MD_LEFT:
1765 tmpCU = getPULeft(idx, partUnitIdx);
1766 break;
1767 case MD_ABOVE:
1768 tmpCU = getPUAbove(idx, partUnitIdx);
1769 break;
1770 case MD_ABOVE_RIGHT:
1771 tmpCU = getPUAboveRight(idx, partUnitIdx);
1772 break;
1773 case MD_BELOW_LEFT:
1774 tmpCU = getPUBelowLeft(idx, partUnitIdx);
1775 break;
1776 case MD_ABOVE_LEFT:
1777 tmpCU = getPUAboveLeft(idx, partUnitIdx);
1778 break;
1779 default:
1780 break;
1781 }
1782
1783 if (!tmpCU)
1784 {
1785 // Mark the PMV as unavailable.
1786 for (int i = 0; i < 2; i++)
1787 neighbour->refIdx[i] = -1;
1788 return;
1789 }
1790
1791 for (int i = 0; i < 2; i++)
1792 {
1793 // Get the MV.
1794 neighbour->mv[i] = tmpCU->m_mv[i][idx];
1795
1796 // Get the reference idx.
1797 neighbour->refIdx[i] = tmpCU->m_refIdx[i][idx];
1798 }
1799 }
1800
1801 /* Clip motion vector to within slightly padded boundary of picture (the
1802 * MV may reference a block that is completely within the padded area).
1803 * Note this function is unaware of how much of this picture is actually
1804 * available for use (re: frame parallelism) */
1805 void CUData::clipMv(MV& outMV) const
1806 {
1807 const uint32_t mvshift = 2;
1808 uint32_t offset = 8;
1809
1810 int16_t xmax = (int16_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift);
1811 int16_t xmin = -(int16_t)((g_maxCUSize + offset + m_cuPelX - 1) << mvshift);
1812
1813 int16_t ymax = (int16_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift);
1814 int16_t ymin = -(int16_t)((g_maxCUSize + offset + m_cuPelY - 1) << mvshift);
1815
1816 outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
1817 outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
1818 }
1819
1820 // Load direct spatial MV if available.
1821 bool CUData::getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
1822 {
1823 int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1824 for (int i = 0; i < 2; i++, picList = !picList)
1825 {
1826 int partRefIdx = neighbours->refIdx[picList];
1827 if (partRefIdx >= 0 && curRefPOC == m_slice->m_refPOCList[picList][partRefIdx])
1828 {
1829 pmv = neighbours->mv[picList];
1830 return true;
1831 }
1832 }
1833 return false;
1834 }
1835
1836 // Load indirect spatial MV if available. An indirect MV has to be scaled.
1837 bool CUData::getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
1838 {
1839 int curPOC = m_slice->m_poc;
1840 int neibPOC = curPOC;
1841 int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1842
1843 for (int i = 0; i < 2; i++, picList = !picList)
1844 {
1845 int partRefIdx = neighbours->refIdx[picList];
1846 if (partRefIdx >= 0)
1847 {
1848 int neibRefPOC = m_slice->m_refPOCList[picList][partRefIdx];
1849 MV mvp = neighbours->mv[picList];
1850
1851 outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
1852 return true;
1853 }
1854 }
1855 return false;
1856 }
1857
1858 bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
1859 {
1860 const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
1861 const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
1862
1863 uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
1864 if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
1865 return false;
1866
1867 int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
1868
1869 int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1870
1871 if (colRefIdx < 0)
1872 {
1873 colRefPicList = !colRefPicList;
1874 colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1875
1876 if (colRefIdx < 0)
1877 return false;
1878 }
1879
1880 // Scale the vector
1881 int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx];
1882 int colPOC = colCU->m_slice->m_poc;
1883 MV colmv = colCU->m_mv[colRefPicList][absPartAddr];
1884
1885 int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
1886 int curPOC = m_slice->m_poc;
1887
1888 outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC);
1889 return true;
1890 }
1891
1892 // Cache the collocated MV.
1893 bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const
1894 {
1895 const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
1896 const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
1897
1898 uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
1899 if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
1900 return false;
1901
1902 for (int list = 0; list < 2; list++)
1903 {
1904 neighbour->cuAddr[list] = cuAddr;
1905 int colRefPicList = m_slice->m_bCheckLDC ? list : m_slice->m_colFromL0Flag;
1906 int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1907
1908 if (colRefIdx < 0)
1909 colRefPicList = !colRefPicList;
1910
1911 neighbour->refIdx[list] = colCU->m_refIdx[colRefPicList][absPartAddr];
1912 neighbour->refIdx[list] |= colRefPicList << 4;
1913
1914 neighbour->mv[list] = colCU->m_mv[colRefPicList][absPartAddr];
1915 }
1916
1917 return neighbour->unifiedRef != -1;
1918 }
1919
1920 MV CUData::scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const
1921 {
1922 int diffPocD = colPOC - colRefPOC;
1923 int diffPocB = curPOC - curRefPOC;
1924
1925 if (diffPocD == diffPocB)
1926 return inMV;
1927 else
1928 {
1929 int tdb = x265_clip3(-128, 127, diffPocB);
1930 int tdd = x265_clip3(-128, 127, diffPocD);
1931 int x = (0x4000 + abs(tdd / 2)) / tdd;
1932 int scale = x265_clip3(-4096, 4095, (tdb * x + 32) >> 6);
1933 return scaleMv(inMV, scale);
1934 }
1935 }
1936
1937 uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const
1938 {
1939 uint32_t absPartIdx;
1940 int puWidth, puHeight;
1941
1942 getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight);
1943
1944 return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx]
1945 + (puHeight >> (LOG2_UNIT_SIZE + 1)) * s_numPartInCUSize
1946 + (puWidth >> (LOG2_UNIT_SIZE + 1))];
1947 }
1948
1949 void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const
1950 {
1951 bool bIsIntra = isIntra(absPartIdx);
1952
1953 // set the group layout
1954 result.log2TrSizeCG = log2TrSize - 2;
1955
1956 // set the scan orders
1957 if (bIsIntra)
1958 {
1959 uint32_t dirMode;
1960
1961 if (bIsLuma)
1962 dirMode = m_lumaIntraDir[absPartIdx];
1963 else
1964 {
1965 dirMode = m_chromaIntraDir[absPartIdx];
1966 if (dirMode == DM_CHROMA_IDX)
1967 {
1968 dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC];
1969 dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode;
1970 }
1971 }
1972
1973 if (log2TrSize <= (MDCS_LOG2_MAX_SIZE - m_hChromaShift) || (bIsLuma && log2TrSize == MDCS_LOG2_MAX_SIZE))
1974 result.scanType = dirMode >= 22 && dirMode <= 30 ? SCAN_HOR : dirMode >= 6 && dirMode <= 14 ? SCAN_VER : SCAN_DIAG;
1975 else
1976 result.scanType = SCAN_DIAG;
1977 }
1978 else
1979 result.scanType = SCAN_DIAG;
1980
1981 result.scan = g_scanOrder[result.scanType][log2TrSize - 2];
1982 result.scanCG = g_scanOrderCG[result.scanType][result.log2TrSizeCG];
1983
1984 if (log2TrSize == 2)
1985 result.firstSignificanceMapContext = 0;
1986 else if (log2TrSize == 3)
1987 result.firstSignificanceMapContext = (result.scanType != SCAN_DIAG && bIsLuma) ? 15 : 9;
1988 else
1989 result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
1990 }
1991
1992 #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag))
1993
1994 void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS])
1995 {
1996 // Initialize the coding blocks inside the CTB
1997 for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= g_log2Size[minCUSize]; log2CUSize--)
1998 {
1999 uint32_t blockSize = 1 << log2CUSize;
2000 uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize);
2001 int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize];
2002
2003 for (uint32_t sbY = 0; sbY < sbWidth; sbY++)
2004 {
2005 for (uint32_t sbX = 0; sbX < sbWidth; sbX++)
2006 {
2007 uint32_t depthIdx = g_depthScanIdx[sbY][sbX];
2008 uint32_t cuIdx = rangeCUIdx + depthIdx;
2009 uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2);
2010 uint32_t px = sbX * blockSize;
2011 uint32_t py = sbY * blockSize;
2012 int32_t presentFlag = px < ctuWidth && py < ctuHeight;
2013 int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight);
2014
2015 /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */
2016 uint32_t xOffset = (sbX * blockSize) >> 3;
2017 uint32_t yOffset = (sbY * blockSize) >> 3;
2018 X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n");
2019
2020 CUGeom *cu = cuDataArray + cuIdx;
2021 cu->log2CUSize = log2CUSize;
2022 cu->childOffset = childIdx - cuIdx;
2023 cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;
2024 cu->numPartitions = (NUM_4x4_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2));
2025 cu->depth = g_log2Size[maxCUSize] - log2CUSize;
2026
2027 cu->flags = 0;
2028 CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
2029 CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag);
2030 CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag);
2031 }
2032 }
2033 rangeCUIdx += sbWidth * sbWidth;
2034 }
2035 }