Mercurial > hg > forks > libbpg
comparison x265/source/common/cudata.cpp @ 0:772086c29cc7
Initial import.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 16 Nov 2016 11:16:33 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:772086c29cc7 |
---|---|
1 /***************************************************************************** | |
2 * Copyright (C) 2015 x265 project | |
3 * | |
4 * Authors: Steve Borho <steve@borho.org> | |
5 * | |
6 * This program is free software; you can redistribute it and/or modify | |
7 * it under the terms of the GNU General Public License as published by | |
8 * the Free Software Foundation; either version 2 of the License, or | |
9 * (at your option) any later version. | |
10 * | |
11 * This program is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 * GNU General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU General Public License | |
17 * along with this program; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 * | |
20 * This program is also available under a commercial proprietary license. | |
21 * For more information, contact us at license @ x265.com. | |
22 *****************************************************************************/ | |
23 | |
24 #include "common.h" | |
25 #include "frame.h" | |
26 #include "framedata.h" | |
27 #include "picyuv.h" | |
28 #include "mv.h" | |
29 #include "cudata.h" | |
30 | |
31 using namespace X265_NS; | |
32 | |
33 /* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */ | |
34 | |
35 static void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; } | |
36 | |
37 static void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; } | |
38 static void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101u * val; } | |
39 | |
40 static void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; } | |
41 static void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; } | |
42 | |
43 static void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; | |
44 ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3]; | |
45 ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5]; | |
46 ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; } | |
47 static void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; | |
48 ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval; | |
49 ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; } | |
50 | |
51 /* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack, | |
52 * but hand-written assembly would beat it. */ | |
53 static void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); } | |
54 static void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); } | |
55 | |
56 namespace { | |
57 // file private namespace | |
58 | |
59 /* Check whether 2 addresses point to the same column */ | |
60 inline bool isEqualCol(int addrA, int addrB, int numUnits) | |
61 { | |
62 // addrA % numUnits == addrB % numUnits | |
63 return ((addrA ^ addrB) & (numUnits - 1)) == 0; | |
64 } | |
65 | |
66 /* Check whether 2 addresses point to the same row */ | |
67 inline bool isEqualRow(int addrA, int addrB, int numUnits) | |
68 { | |
69 // addrA / numUnits == addrB / numUnits | |
70 return ((addrA ^ addrB) & ~(numUnits - 1)) == 0; | |
71 } | |
72 | |
73 /* Check whether 2 addresses point to the same row or column */ | |
74 inline bool isEqualRowOrCol(int addrA, int addrB, int numUnits) | |
75 { | |
76 return isEqualCol(addrA, addrB, numUnits) | isEqualRow(addrA, addrB, numUnits); | |
77 } | |
78 | |
79 /* Check whether one address points to the first column */ | |
80 inline bool isZeroCol(int addr, int numUnits) | |
81 { | |
82 // addr % numUnits == 0 | |
83 return (addr & (numUnits - 1)) == 0; | |
84 } | |
85 | |
86 /* Check whether one address points to the first row */ | |
87 inline bool isZeroRow(int addr, int numUnits) | |
88 { | |
89 // addr / numUnits == 0 | |
90 return (addr & ~(numUnits - 1)) == 0; | |
91 } | |
92 | |
93 /* Check whether one address points to a column whose index is smaller than a given value */ | |
94 inline bool lessThanCol(int addr, int val, int numUnits) | |
95 { | |
96 // addr % numUnits < val | |
97 return (addr & (numUnits - 1)) < val; | |
98 } | |
99 | |
100 /* Check whether one address points to a row whose index is smaller than a given value */ | |
101 inline bool lessThanRow(int addr, int val, int numUnits) | |
102 { | |
103 // addr / numUnits < val | |
104 return addr < val * numUnits; | |
105 } | |
106 | |
107 inline MV scaleMv(MV mv, int scale) | |
108 { | |
109 int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8); | |
110 int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8); | |
111 | |
112 return MV((int16_t)mvx, (int16_t)mvy); | |
113 } | |
114 | |
115 } | |
116 | |
117 cubcast_t CUData::s_partSet[NUM_FULL_DEPTH] = { NULL, NULL, NULL, NULL, NULL }; | |
118 uint32_t CUData::s_numPartInCUSize; | |
119 | |
120 CUData::CUData() | |
121 { | |
122 memset(this, 0, sizeof(*this)); | |
123 } | |
124 | |
125 void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance) | |
126 { | |
127 m_chromaFormat = csp; | |
128 m_hChromaShift = CHROMA_H_SHIFT(csp); | |
129 m_vChromaShift = CHROMA_V_SHIFT(csp); | |
130 m_numPartitions = NUM_4x4_PARTITIONS >> (depth * 2); | |
131 | |
132 if (!s_partSet[0]) | |
133 { | |
134 s_numPartInCUSize = 1 << g_unitSizeDepth; | |
135 switch (g_maxLog2CUSize) | |
136 { | |
137 case 6: | |
138 s_partSet[0] = bcast256; | |
139 s_partSet[1] = bcast64; | |
140 s_partSet[2] = bcast16; | |
141 s_partSet[3] = bcast4; | |
142 s_partSet[4] = bcast1; | |
143 break; | |
144 case 5: | |
145 s_partSet[0] = bcast64; | |
146 s_partSet[1] = bcast16; | |
147 s_partSet[2] = bcast4; | |
148 s_partSet[3] = bcast1; | |
149 s_partSet[4] = NULL; | |
150 break; | |
151 case 4: | |
152 s_partSet[0] = bcast16; | |
153 s_partSet[1] = bcast4; | |
154 s_partSet[2] = bcast1; | |
155 s_partSet[3] = NULL; | |
156 s_partSet[4] = NULL; | |
157 break; | |
158 default: | |
159 X265_CHECK(0, "unexpected CTU size\n"); | |
160 break; | |
161 } | |
162 } | |
163 | |
164 switch (m_numPartitions) | |
165 { | |
166 case 256: // 64x64 CU | |
167 m_partCopy = copy256; | |
168 m_partSet = bcast256; | |
169 m_subPartCopy = copy64; | |
170 m_subPartSet = bcast64; | |
171 break; | |
172 case 64: // 32x32 CU | |
173 m_partCopy = copy64; | |
174 m_partSet = bcast64; | |
175 m_subPartCopy = copy16; | |
176 m_subPartSet = bcast16; | |
177 break; | |
178 case 16: // 16x16 CU | |
179 m_partCopy = copy16; | |
180 m_partSet = bcast16; | |
181 m_subPartCopy = copy4; | |
182 m_subPartSet = bcast4; | |
183 break; | |
184 case 4: // 8x8 CU | |
185 m_partCopy = copy4; | |
186 m_partSet = bcast4; | |
187 m_subPartCopy = NULL; | |
188 m_subPartSet = NULL; | |
189 break; | |
190 default: | |
191 X265_CHECK(0, "unexpected CU partition count\n"); | |
192 break; | |
193 } | |
194 | |
195 /* Each CU's data is layed out sequentially within the charMemBlock */ | |
196 uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance; | |
197 | |
198 m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; | |
199 m_log2CUSize = charBuf; charBuf += m_numPartitions; | |
200 m_lumaIntraDir = charBuf; charBuf += m_numPartitions; | |
201 m_tqBypass = charBuf; charBuf += m_numPartitions; | |
202 m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions; | |
203 m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions; | |
204 m_cuDepth = charBuf; charBuf += m_numPartitions; | |
205 m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ | |
206 m_partSize = charBuf; charBuf += m_numPartitions; | |
207 m_mergeFlag = charBuf; charBuf += m_numPartitions; | |
208 m_interDir = charBuf; charBuf += m_numPartitions; | |
209 m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; | |
210 m_mvpIdx[1] = charBuf; charBuf += m_numPartitions; | |
211 m_tuDepth = charBuf; charBuf += m_numPartitions; | |
212 m_transformSkip[0] = charBuf; charBuf += m_numPartitions; | |
213 m_transformSkip[1] = charBuf; charBuf += m_numPartitions; | |
214 m_transformSkip[2] = charBuf; charBuf += m_numPartitions; | |
215 m_cbf[0] = charBuf; charBuf += m_numPartitions; | |
216 m_cbf[1] = charBuf; charBuf += m_numPartitions; | |
217 m_cbf[2] = charBuf; charBuf += m_numPartitions; | |
218 m_chromaIntraDir = charBuf; charBuf += m_numPartitions; | |
219 | |
220 X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n"); | |
221 | |
222 m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions; | |
223 m_mv[1] = m_mv[0] + m_numPartitions; | |
224 m_mvd[0] = m_mv[1] + m_numPartitions; | |
225 m_mvd[1] = m_mvd[0] + m_numPartitions; | |
226 | |
227 uint32_t cuSize = g_maxCUSize >> depth; | |
228 uint32_t sizeL = cuSize * cuSize; | |
229 uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); | |
230 m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2); | |
231 m_trCoeff[1] = m_trCoeff[0] + sizeL; | |
232 m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC; | |
233 } | |
234 | |
235 void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp) | |
236 { | |
237 m_encData = frame.m_encData; | |
238 m_slice = m_encData->m_slice; | |
239 m_cuAddr = cuAddr; | |
240 m_cuPelX = (cuAddr % m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize; | |
241 m_cuPelY = (cuAddr / m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize; | |
242 m_absIdxInCTU = 0; | |
243 m_numPartitions = NUM_4x4_PARTITIONS; | |
244 | |
245 /* sequential memsets */ | |
246 m_partSet((uint8_t*)m_qp, (uint8_t)qp); | |
247 m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize); | |
248 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); | |
249 m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless); | |
250 if (m_slice->m_sliceType != I_SLICE) | |
251 { | |
252 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); | |
253 m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); | |
254 } | |
255 | |
256 X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n"); | |
257 | |
258 /* initialize the remaining CU data in one memset */ | |
259 memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions); | |
260 | |
261 uint32_t widthInCU = m_slice->m_sps->numCuInWidth; | |
262 m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL; | |
263 m_cuAbove = (m_cuAddr / widthInCU) ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL; | |
264 m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; | |
265 m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; | |
266 } | |
267 | |
268 // initialize Sub partition | |
269 void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp) | |
270 { | |
271 m_absIdxInCTU = cuGeom.absPartIdx; | |
272 m_encData = ctu.m_encData; | |
273 m_slice = ctu.m_slice; | |
274 m_cuAddr = ctu.m_cuAddr; | |
275 m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; | |
276 m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; | |
277 m_cuLeft = ctu.m_cuLeft; | |
278 m_cuAbove = ctu.m_cuAbove; | |
279 m_cuAboveLeft = ctu.m_cuAboveLeft; | |
280 m_cuAboveRight = ctu.m_cuAboveRight; | |
281 X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n"); | |
282 | |
283 m_partSet((uint8_t*)m_qp, (uint8_t)qp); | |
284 | |
285 m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize); | |
286 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); | |
287 m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless); | |
288 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); | |
289 m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); | |
290 m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); | |
291 | |
292 /* initialize the remaining CU data in one memset */ | |
293 memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions); | |
294 } | |
295 | |
296 /* Copy the results of a sub-part (split) CU to the parent CU */ | |
297 void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx) | |
298 { | |
299 X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n"); | |
300 | |
301 uint32_t offset = childGeom.numPartitions * subPartIdx; | |
302 | |
303 m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp); | |
304 m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize); | |
305 m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir); | |
306 m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass); | |
307 m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]); | |
308 m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]); | |
309 m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth); | |
310 m_subPartCopy(m_predMode + offset, subCU.m_predMode); | |
311 m_subPartCopy(m_partSize + offset, subCU.m_partSize); | |
312 m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag); | |
313 m_subPartCopy(m_interDir + offset, subCU.m_interDir); | |
314 m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]); | |
315 m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]); | |
316 m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth); | |
317 m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]); | |
318 m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]); | |
319 m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]); | |
320 m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]); | |
321 m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]); | |
322 m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]); | |
323 m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir); | |
324 | |
325 memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV)); | |
326 memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV)); | |
327 memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV)); | |
328 memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV)); | |
329 | |
330 uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2); | |
331 uint32_t tmp2 = subPartIdx * tmp; | |
332 memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t) * tmp); | |
333 | |
334 uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift); | |
335 uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift); | |
336 memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC); | |
337 memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC); | |
338 } | |
339 | |
340 /* If a sub-CU part is not present (off the edge of the picture) its depth and | |
341 * log2size should still be configured */ | |
342 void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx) | |
343 { | |
344 uint32_t offset = childGeom.numPartitions * subPartIdx; | |
345 m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth); | |
346 m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize); | |
347 } | |
348 | |
349 /* Copy all CU data from one instance to the next, except set lossless flag | |
350 * This will only get used when --cu-lossless is enabled but --lossless is not. */ | |
351 void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom) | |
352 { | |
353 /* Start by making an exact copy */ | |
354 m_encData = cu.m_encData; | |
355 m_slice = cu.m_slice; | |
356 m_cuAddr = cu.m_cuAddr; | |
357 m_cuPelX = cu.m_cuPelX; | |
358 m_cuPelY = cu.m_cuPelY; | |
359 m_cuLeft = cu.m_cuLeft; | |
360 m_cuAbove = cu.m_cuAbove; | |
361 m_cuAboveLeft = cu.m_cuAboveLeft; | |
362 m_cuAboveRight = cu.m_cuAboveRight; | |
363 m_absIdxInCTU = cuGeom.absPartIdx; | |
364 m_numPartitions = cuGeom.numPartitions; | |
365 memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions); | |
366 memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV)); | |
367 memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV)); | |
368 memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV)); | |
369 memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV)); | |
370 | |
371 /* force TQBypass to true */ | |
372 m_partSet(m_tqBypass, true); | |
373 | |
374 /* clear residual coding flags */ | |
375 m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER)); | |
376 m_partSet(m_tuDepth, 0); | |
377 m_partSet(m_transformSkip[0], 0); | |
378 m_partSet(m_transformSkip[1], 0); | |
379 m_partSet(m_transformSkip[2], 0); | |
380 m_partSet(m_cbf[0], 0); | |
381 m_partSet(m_cbf[1], 0); | |
382 m_partSet(m_cbf[2], 0); | |
383 } | |
384 | |
385 /* Copy completed predicted CU to CTU in picture */ | |
386 void CUData::copyToPic(uint32_t depth) const | |
387 { | |
388 CUData& ctu = *m_encData->getPicCTU(m_cuAddr); | |
389 | |
390 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); | |
391 m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize); | |
392 m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir); | |
393 m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass); | |
394 m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]); | |
395 m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]); | |
396 m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth); | |
397 m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); | |
398 m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize); | |
399 m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag); | |
400 m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir); | |
401 m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]); | |
402 m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]); | |
403 m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); | |
404 m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); | |
405 m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); | |
406 m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); | |
407 m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); | |
408 m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); | |
409 m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); | |
410 m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); | |
411 | |
412 memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV)); | |
413 memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV)); | |
414 memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV)); | |
415 memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV)); | |
416 | |
417 uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2); | |
418 uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); | |
419 memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); | |
420 | |
421 uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift); | |
422 uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift); | |
423 memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC); | |
424 memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC); | |
425 } | |
426 | |
427 /* The reverse of copyToPic, called only by encodeResidue */ | |
428 void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom) | |
429 { | |
430 m_encData = ctu.m_encData; | |
431 m_slice = ctu.m_slice; | |
432 m_cuAddr = ctu.m_cuAddr; | |
433 m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; | |
434 m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; | |
435 m_absIdxInCTU = cuGeom.absPartIdx; | |
436 m_numPartitions = cuGeom.numPartitions; | |
437 | |
438 /* copy out all prediction info for this part */ | |
439 m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU); | |
440 m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU); | |
441 m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU); | |
442 m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU); | |
443 m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU); | |
444 m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU); | |
445 m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU); | |
446 m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */ | |
447 m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU); | |
448 m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU); | |
449 m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU); | |
450 m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU); | |
451 m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU); | |
452 m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU); | |
453 | |
454 memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
455 memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
456 memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
457 memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); | |
458 | |
459 /* clear residual coding flags */ | |
460 m_partSet(m_tuDepth, 0); | |
461 m_partSet(m_transformSkip[0], 0); | |
462 m_partSet(m_transformSkip[1], 0); | |
463 m_partSet(m_transformSkip[2], 0); | |
464 m_partSet(m_cbf[0], 0); | |
465 m_partSet(m_cbf[1], 0); | |
466 m_partSet(m_cbf[2], 0); | |
467 } | |
468 | |
469 /* Only called by encodeResidue, these fields can be modified during inter/intra coding */ | |
470 void CUData::updatePic(uint32_t depth) const | |
471 { | |
472 CUData& ctu = *m_encData->getPicCTU(m_cuAddr); | |
473 | |
474 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); | |
475 m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); | |
476 m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); | |
477 m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); | |
478 m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); | |
479 m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); | |
480 m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); | |
481 m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); | |
482 m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); | |
483 m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); | |
484 | |
485 uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2); | |
486 uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); | |
487 memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); | |
488 tmpY >>= m_hChromaShift + m_vChromaShift; | |
489 tmpY2 >>= m_hChromaShift + m_vChromaShift; | |
490 memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY); | |
491 memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY); | |
492 } | |
493 | |
494 const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const | |
495 { | |
496 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
497 | |
498 if (!isZeroCol(absPartIdx, s_numPartInCUSize)) | |
499 { | |
500 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
501 lPartUnitIdx = g_rasterToZscan[absPartIdx - 1]; | |
502 if (isEqualCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
503 return m_encData->getPicCTU(m_cuAddr); | |
504 else | |
505 { | |
506 lPartUnitIdx -= m_absIdxInCTU; | |
507 return this; | |
508 } | |
509 } | |
510 | |
511 lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1]; | |
512 return m_cuLeft; | |
513 } | |
514 | |
515 const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const | |
516 { | |
517 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
518 | |
519 if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
520 { | |
521 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
522 aPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize]; | |
523 if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
524 return m_encData->getPicCTU(m_cuAddr); | |
525 else | |
526 aPartUnitIdx -= m_absIdxInCTU; | |
527 return this; | |
528 } | |
529 | |
530 aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_4x4_PARTITIONS - s_numPartInCUSize]; | |
531 return m_cuAbove; | |
532 } | |
533 | |
534 const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const | |
535 { | |
536 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; | |
537 | |
538 if (!isZeroCol(absPartIdx, s_numPartInCUSize)) | |
539 { | |
540 if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
541 { | |
542 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; | |
543 alPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize - 1]; | |
544 if (isEqualRowOrCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) | |
545 return m_encData->getPicCTU(m_cuAddr); | |
546 else | |
547 { | |
548 alPartUnitIdx -= m_absIdxInCTU; | |
549 return this; | |
550 } | |
551 } | |
552 alPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_4x4_PARTITIONS - s_numPartInCUSize - 1]; | |
553 return m_cuAbove; | |
554 } | |
555 | |
556 if (!isZeroRow(absPartIdx, s_numPartInCUSize)) | |
557 { | |
558 alPartUnitIdx = g_rasterToZscan[absPartIdx - 1]; | |
559 return m_cuLeft; | |
560 } | |
561 | |
562 alPartUnitIdx = g_rasterToZscan[NUM_4x4_PARTITIONS - 1]; | |
563 return m_cuAboveLeft; | |
564 } | |
565 | |
566 const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const | |
567 { | |
568 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples) | |
569 return NULL; | |
570 | |
571 uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; | |
572 | |
573 if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1, s_numPartInCUSize)) | |
574 { | |
575 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
576 { | |
577 if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]) | |
578 { | |
579 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; | |
580 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]; | |
581 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize)) | |
582 return m_encData->getPicCTU(m_cuAddr); | |
583 else | |
584 { | |
585 arPartUnitIdx -= m_absIdxInCTU; | |
586 return this; | |
587 } | |
588 } | |
589 return NULL; | |
590 } | |
591 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_4x4_PARTITIONS - s_numPartInCUSize + 1]; | |
592 return m_cuAbove; | |
593 } | |
594 | |
595 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
596 return NULL; | |
597 | |
598 arPartUnitIdx = g_rasterToZscan[NUM_4x4_PARTITIONS - s_numPartInCUSize]; | |
599 return m_cuAboveRight; | |
600 } | |
601 | |
602 const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const | |
603 { | |
604 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples) | |
605 return NULL; | |
606 | |
607 uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; | |
608 | |
609 if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1, s_numPartInCUSize)) | |
610 { | |
611 if (!isZeroCol(absPartIdxLB, s_numPartInCUSize)) | |
612 { | |
613 if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1]) | |
614 { | |
615 uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize; | |
616 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize - 1]; | |
617 if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize)) | |
618 return m_encData->getPicCTU(m_cuAddr); | |
619 else | |
620 { | |
621 blPartUnitIdx -= m_absIdxInCTU; | |
622 return this; | |
623 } | |
624 } | |
625 return NULL; | |
626 } | |
627 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + s_numPartInCUSize * 2 - 1]; | |
628 return m_cuLeft; | |
629 } | |
630 | |
631 return NULL; | |
632 } | |
633 | |
634 const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const | |
635 { | |
636 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples) | |
637 return NULL; | |
638 | |
639 uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; | |
640 | |
641 if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize)) | |
642 { | |
643 if (!isZeroCol(absPartIdxLB, s_numPartInCUSize)) | |
644 { | |
645 if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1]) | |
646 { | |
647 uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * s_numPartInCUSize; | |
648 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + partUnitOffset * s_numPartInCUSize - 1]; | |
649 if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, s_numPartInCUSize)) | |
650 return m_encData->getPicCTU(m_cuAddr); | |
651 else | |
652 { | |
653 blPartUnitIdx -= m_absIdxInCTU; | |
654 return this; | |
655 } | |
656 } | |
657 return NULL; | |
658 } | |
659 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1]; | |
660 return m_cuLeft; | |
661 } | |
662 | |
663 return NULL; | |
664 } | |
665 | |
666 const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const | |
667 { | |
668 if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples) | |
669 return NULL; | |
670 | |
671 uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; | |
672 | |
673 if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset, s_numPartInCUSize)) | |
674 { | |
675 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
676 { | |
677 if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset]) | |
678 { | |
679 uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; | |
680 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + partUnitOffset]; | |
681 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize)) | |
682 return m_encData->getPicCTU(m_cuAddr); | |
683 else | |
684 { | |
685 arPartUnitIdx -= m_absIdxInCTU; | |
686 return this; | |
687 } | |
688 } | |
689 return NULL; | |
690 } | |
691 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_4x4_PARTITIONS - s_numPartInCUSize + partUnitOffset]; | |
692 return m_cuAbove; | |
693 } | |
694 | |
695 if (!isZeroRow(absPartIdxRT, s_numPartInCUSize)) | |
696 return NULL; | |
697 | |
698 arPartUnitIdx = g_rasterToZscan[NUM_4x4_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1]; | |
699 return m_cuAboveRight; | |
700 } | |
701 | |
702 /* Get left QpMinCu */ | |
703 const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const | |
704 { | |
705 uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2); | |
706 uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; | |
707 | |
708 // check for left CTU boundary | |
709 if (isZeroCol(absRorderQpMinCUIdx, s_numPartInCUSize)) | |
710 return NULL; | |
711 | |
712 // get index of left-CU relative to top-left corner of current quantization group | |
713 lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1]; | |
714 | |
715 // return pointer to current CTU | |
716 return m_encData->getPicCTU(m_cuAddr); | |
717 } | |
718 | |
719 /* Get above QpMinCu */ | |
720 const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const | |
721 { | |
722 uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (g_unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2); | |
723 uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; | |
724 | |
725 // check for top CTU boundary | |
726 if (isZeroRow(absRorderQpMinCUIdx, s_numPartInCUSize)) | |
727 return NULL; | |
728 | |
729 // get index of top-CU relative to top-left corner of current quantization group | |
730 aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - s_numPartInCUSize]; | |
731 | |
732 // return pointer to current CTU | |
733 return m_encData->getPicCTU(m_cuAddr); | |
734 } | |
735 | |
736 /* Get reference QP from left QpMinCu or latest coded QP */ | |
737 int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const | |
738 { | |
739 uint32_t lPartIdx = 0, aPartIdx = 0; | |
740 const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU); | |
741 const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU); | |
742 | |
743 return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1; | |
744 } | |
745 | |
746 int CUData::getLastValidPartIdx(int absPartIdx) const | |
747 { | |
748 int lastValidPartIdx = absPartIdx - 1; | |
749 | |
750 while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE) | |
751 { | |
752 uint32_t depth = m_cuDepth[lastValidPartIdx]; | |
753 lastValidPartIdx -= m_numPartitions >> (depth << 1); | |
754 } | |
755 | |
756 return lastValidPartIdx; | |
757 } | |
758 | |
759 int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const | |
760 { | |
761 uint32_t quPartIdxMask = 0xFF << (g_unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2; | |
762 int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask); | |
763 | |
764 if (lastValidPartIdx >= 0) | |
765 return m_qp[lastValidPartIdx]; | |
766 else | |
767 { | |
768 if (m_absIdxInCTU) | |
769 return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU); | |
770 else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth))) | |
771 return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(NUM_4x4_PARTITIONS); | |
772 else | |
773 return (int8_t)m_slice->m_sliceQp; | |
774 } | |
775 } | |
776 | |
777 /* Get allowed chroma intra modes */ | |
778 void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const | |
779 { | |
780 modeList[0] = PLANAR_IDX; | |
781 modeList[1] = VER_IDX; | |
782 modeList[2] = HOR_IDX; | |
783 modeList[3] = DC_IDX; | |
784 modeList[4] = DM_CHROMA_IDX; | |
785 | |
786 uint32_t lumaMode = m_lumaIntraDir[absPartIdx]; | |
787 | |
788 for (int i = 0; i < NUM_CHROMA_MODE - 1; i++) | |
789 { | |
790 if (lumaMode == modeList[i]) | |
791 { | |
792 modeList[i] = 34; // VER+8 mode | |
793 break; | |
794 } | |
795 } | |
796 } | |
797 | |
798 /* Get most probable intra modes */ | |
799 int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const | |
800 { | |
801 const CUData* tempCU; | |
802 uint32_t tempPartIdx; | |
803 uint32_t leftIntraDir, aboveIntraDir; | |
804 | |
805 // Get intra direction of left PU | |
806 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
807 | |
808 leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; | |
809 | |
810 // Get intra direction of above PU | |
811 tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL; | |
812 | |
813 aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; | |
814 | |
815 if (leftIntraDir == aboveIntraDir) | |
816 { | |
817 if (leftIntraDir >= 2) // angular modes | |
818 { | |
819 intraDirPred[0] = leftIntraDir; | |
820 intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2; | |
821 intraDirPred[2] = ((leftIntraDir - 2 + 1) & 31) + 2; | |
822 } | |
823 else //non-angular | |
824 { | |
825 intraDirPred[0] = PLANAR_IDX; | |
826 intraDirPred[1] = DC_IDX; | |
827 intraDirPred[2] = VER_IDX; | |
828 } | |
829 return 1; | |
830 } | |
831 else | |
832 { | |
833 intraDirPred[0] = leftIntraDir; | |
834 intraDirPred[1] = aboveIntraDir; | |
835 | |
836 if (leftIntraDir && aboveIntraDir) //both modes are non-planar | |
837 intraDirPred[2] = PLANAR_IDX; | |
838 else | |
839 intraDirPred[2] = (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX; | |
840 return 2; | |
841 } | |
842 } | |
843 | |
844 uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const | |
845 { | |
846 const CUData* tempCU; | |
847 uint32_t tempPartIdx; | |
848 uint32_t ctx; | |
849 | |
850 // Get left split flag | |
851 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
852 ctx = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; | |
853 | |
854 // Get above split flag | |
855 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
856 ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; | |
857 | |
858 return ctx; | |
859 } | |
860 | |
861 void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const | |
862 { | |
863 uint32_t log2CUSize = m_log2CUSize[absPartIdx]; | |
864 uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N; | |
865 | |
866 tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; | |
867 tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; | |
868 | |
869 tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag)); | |
870 } | |
871 | |
872 void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const | |
873 { | |
874 uint32_t log2CUSize = m_log2CUSize[absPartIdx]; | |
875 uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter; | |
876 uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N; | |
877 | |
878 tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; | |
879 tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; | |
880 | |
881 tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag)); | |
882 } | |
883 | |
884 uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const | |
885 { | |
886 const CUData* tempCU; | |
887 uint32_t tempPartIdx; | |
888 uint32_t ctx; | |
889 | |
890 // Get BCBP of left PU | |
891 tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
892 ctx = tempCU ? tempCU->isSkipped(tempPartIdx) : 0; | |
893 | |
894 // Get BCBP of above PU | |
895 tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); | |
896 ctx += tempCU ? tempCU->isSkipped(tempPartIdx) : 0; | |
897 | |
898 return ctx; | |
899 } | |
900 | |
901 bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth) | |
902 { | |
903 uint32_t curPartNumb = NUM_4x4_PARTITIONS >> (depth << 1); | |
904 uint32_t curPartNumQ = curPartNumb >> 2; | |
905 | |
906 if (m_cuDepth[absPartIdx] > depth) | |
907 { | |
908 for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) | |
909 if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1)) | |
910 return true; | |
911 } | |
912 else | |
913 { | |
914 if (getQtRootCbf(absPartIdx)) | |
915 return true; | |
916 else | |
917 setQPSubParts(qp, absPartIdx, depth); | |
918 } | |
919 | |
920 return false; | |
921 } | |
922 | |
923 void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx) | |
924 { | |
925 uint32_t curPartNumQ = m_numPartitions >> 2; | |
926 X265_CHECK(puIdx < 2, "unexpected part unit index\n"); | |
927 | |
928 switch (m_partSize[absPartIdx]) | |
929 { | |
930 case SIZE_2Nx2N: | |
931 memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ); | |
932 break; | |
933 case SIZE_2NxN: | |
934 memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ); | |
935 break; | |
936 case SIZE_Nx2N: | |
937 memset(m_interDir + absPartIdx, dir, curPartNumQ); | |
938 memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ); | |
939 break; | |
940 case SIZE_NxN: | |
941 memset(m_interDir + absPartIdx, dir, curPartNumQ); | |
942 break; | |
943 case SIZE_2NxnU: | |
944 if (!puIdx) | |
945 { | |
946 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
947 memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); | |
948 } | |
949 else | |
950 { | |
951 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
952 memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1))); | |
953 } | |
954 break; | |
955 case SIZE_2NxnD: | |
956 if (!puIdx) | |
957 { | |
958 memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1))); | |
959 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1)); | |
960 } | |
961 else | |
962 { | |
963 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); | |
964 memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); | |
965 } | |
966 break; | |
967 case SIZE_nLx2N: | |
968 if (!puIdx) | |
969 { | |
970 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
971 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
972 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
973 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
974 } | |
975 else | |
976 { | |
977 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
978 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
979 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
980 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
981 } | |
982 break; | |
983 case SIZE_nRx2N: | |
984 if (!puIdx) | |
985 { | |
986 memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2))); | |
987 memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
988 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2))); | |
989 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
990 } | |
991 else | |
992 { | |
993 memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); | |
994 memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
995 memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); | |
996 memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); | |
997 } | |
998 break; | |
999 default: | |
1000 X265_CHECK(0, "unexpected part type\n"); | |
1001 break; | |
1002 } | |
1003 } | |
1004 | |
1005 template<typename T> | |
1006 void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx) | |
1007 { | |
1008 int i; | |
1009 | |
1010 p += absPartIdx; | |
1011 int numElements = m_numPartitions; | |
1012 | |
1013 switch (m_partSize[absPartIdx]) | |
1014 { | |
1015 case SIZE_2Nx2N: | |
1016 for (i = 0; i < numElements; i++) | |
1017 p[i] = val; | |
1018 break; | |
1019 | |
1020 case SIZE_2NxN: | |
1021 numElements >>= 1; | |
1022 for (i = 0; i < numElements; i++) | |
1023 p[i] = val; | |
1024 break; | |
1025 | |
1026 case SIZE_Nx2N: | |
1027 numElements >>= 2; | |
1028 for (i = 0; i < numElements; i++) | |
1029 { | |
1030 p[i] = val; | |
1031 p[i + 2 * numElements] = val; | |
1032 } | |
1033 break; | |
1034 | |
1035 case SIZE_2NxnU: | |
1036 { | |
1037 int curPartNumQ = numElements >> 2; | |
1038 if (!puIdx) | |
1039 { | |
1040 T *pT = p; | |
1041 T *pT2 = p + curPartNumQ; | |
1042 for (i = 0; i < (curPartNumQ >> 1); i++) | |
1043 { | |
1044 pT[i] = val; | |
1045 pT2[i] = val; | |
1046 } | |
1047 } | |
1048 else | |
1049 { | |
1050 T *pT = p; | |
1051 for (i = 0; i < (curPartNumQ >> 1); i++) | |
1052 pT[i] = val; | |
1053 | |
1054 pT = p + curPartNumQ; | |
1055 for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) | |
1056 pT[i] = val; | |
1057 } | |
1058 break; | |
1059 } | |
1060 | |
1061 case SIZE_2NxnD: | |
1062 { | |
1063 int curPartNumQ = numElements >> 2; | |
1064 if (!puIdx) | |
1065 { | |
1066 T *pT = p; | |
1067 for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) | |
1068 pT[i] = val; | |
1069 | |
1070 pT = p + (numElements - curPartNumQ); | |
1071 for (i = 0; i < (curPartNumQ >> 1); i++) | |
1072 pT[i] = val; | |
1073 } | |
1074 else | |
1075 { | |
1076 T *pT = p; | |
1077 T *pT2 = p + curPartNumQ; | |
1078 for (i = 0; i < (curPartNumQ >> 1); i++) | |
1079 { | |
1080 pT[i] = val; | |
1081 pT2[i] = val; | |
1082 } | |
1083 } | |
1084 break; | |
1085 } | |
1086 | |
1087 case SIZE_nLx2N: | |
1088 { | |
1089 int curPartNumQ = numElements >> 2; | |
1090 if (!puIdx) | |
1091 { | |
1092 T *pT = p; | |
1093 T *pT2 = p + (curPartNumQ << 1); | |
1094 T *pT3 = p + (curPartNumQ >> 1); | |
1095 T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1096 | |
1097 for (i = 0; i < (curPartNumQ >> 2); i++) | |
1098 { | |
1099 pT[i] = val; | |
1100 pT2[i] = val; | |
1101 pT3[i] = val; | |
1102 pT4[i] = val; | |
1103 } | |
1104 } | |
1105 else | |
1106 { | |
1107 T *pT = p; | |
1108 T *pT2 = p + (curPartNumQ << 1); | |
1109 for (i = 0; i < (curPartNumQ >> 2); i++) | |
1110 { | |
1111 pT[i] = val; | |
1112 pT2[i] = val; | |
1113 } | |
1114 | |
1115 pT = p + (curPartNumQ >> 1); | |
1116 pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1117 for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) | |
1118 { | |
1119 pT[i] = val; | |
1120 pT2[i] = val; | |
1121 } | |
1122 } | |
1123 break; | |
1124 } | |
1125 | |
1126 case SIZE_nRx2N: | |
1127 { | |
1128 int curPartNumQ = numElements >> 2; | |
1129 if (!puIdx) | |
1130 { | |
1131 T *pT = p; | |
1132 T *pT2 = p + (curPartNumQ << 1); | |
1133 for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) | |
1134 { | |
1135 pT[i] = val; | |
1136 pT2[i] = val; | |
1137 } | |
1138 | |
1139 pT = p + curPartNumQ + (curPartNumQ >> 1); | |
1140 pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1); | |
1141 for (i = 0; i < (curPartNumQ >> 2); i++) | |
1142 { | |
1143 pT[i] = val; | |
1144 pT2[i] = val; | |
1145 } | |
1146 } | |
1147 else | |
1148 { | |
1149 T *pT = p; | |
1150 T *pT2 = p + (curPartNumQ >> 1); | |
1151 T *pT3 = p + (curPartNumQ << 1); | |
1152 T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); | |
1153 for (i = 0; i < (curPartNumQ >> 2); i++) | |
1154 { | |
1155 pT[i] = val; | |
1156 pT2[i] = val; | |
1157 pT3[i] = val; | |
1158 pT4[i] = val; | |
1159 } | |
1160 } | |
1161 break; | |
1162 } | |
1163 | |
1164 case SIZE_NxN: | |
1165 default: | |
1166 X265_CHECK(0, "unknown partition type\n"); | |
1167 break; | |
1168 } | |
1169 } | |
1170 | |
1171 void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx) | |
1172 { | |
1173 setAllPU(m_mv[list], mv, absPartIdx, puIdx); | |
1174 } | |
1175 | |
1176 void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx) | |
1177 { | |
1178 setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx); | |
1179 } | |
1180 | |
1181 void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const | |
1182 { | |
1183 int cuSize = 1 << m_log2CUSize[0]; | |
1184 int partType = m_partSize[0]; | |
1185 | |
1186 int tmp = partTable[partType][partIdx][0]; | |
1187 outWidth = ((tmp >> 4) * cuSize) >> 2; | |
1188 outHeight = ((tmp & 0xF) * cuSize) >> 2; | |
1189 outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4; | |
1190 } | |
1191 | |
1192 void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const | |
1193 { | |
1194 if (cu) | |
1195 { | |
1196 outMvField.mv = cu->m_mv[picList][absPartIdx]; | |
1197 outMvField.refIdx = cu->m_refIdx[picList][absPartIdx]; | |
1198 } | |
1199 else | |
1200 { | |
1201 // OUT OF BOUNDARY | |
1202 outMvField.mv = 0; | |
1203 outMvField.refIdx = REF_NOT_VALID; | |
1204 } | |
1205 } | |
1206 | |
1207 void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const | |
1208 { | |
1209 partIdxLT = m_absIdxInCTU; | |
1210 partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; | |
1211 | |
1212 switch (m_partSize[0]) | |
1213 { | |
1214 case SIZE_2Nx2N: break; | |
1215 case SIZE_2NxN: | |
1216 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1; | |
1217 partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1; | |
1218 break; | |
1219 case SIZE_Nx2N: | |
1220 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2; | |
1221 partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2; | |
1222 break; | |
1223 case SIZE_NxN: | |
1224 partIdxLT += (m_numPartitions >> 2) * partIdx; | |
1225 partIdxRT += (m_numPartitions >> 2) * (partIdx - 1); | |
1226 break; | |
1227 case SIZE_2NxnU: | |
1228 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3; | |
1229 partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3; | |
1230 break; | |
1231 case SIZE_2NxnD: | |
1232 partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); | |
1233 partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); | |
1234 break; | |
1235 case SIZE_nLx2N: | |
1236 partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4; | |
1237 partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); | |
1238 break; | |
1239 case SIZE_nRx2N: | |
1240 partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); | |
1241 partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4; | |
1242 break; | |
1243 default: | |
1244 X265_CHECK(0, "unexpected part index\n"); | |
1245 break; | |
1246 } | |
1247 } | |
1248 | |
1249 uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const | |
1250 { | |
1251 uint32_t outPartIdxLB; | |
1252 outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize]; | |
1253 | |
1254 switch (m_partSize[0]) | |
1255 { | |
1256 case SIZE_2Nx2N: | |
1257 outPartIdxLB += m_numPartitions >> 1; | |
1258 break; | |
1259 case SIZE_2NxN: | |
1260 outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0; | |
1261 break; | |
1262 case SIZE_Nx2N: | |
1263 outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1; | |
1264 break; | |
1265 case SIZE_NxN: | |
1266 outPartIdxLB += (m_numPartitions >> 2) * puIdx; | |
1267 break; | |
1268 case SIZE_2NxnU: | |
1269 outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); | |
1270 break; | |
1271 case SIZE_2NxnD: | |
1272 outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); | |
1273 break; | |
1274 case SIZE_nLx2N: | |
1275 outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1; | |
1276 break; | |
1277 case SIZE_nRx2N: | |
1278 outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1; | |
1279 break; | |
1280 default: | |
1281 X265_CHECK(0, "unexpected part index\n"); | |
1282 break; | |
1283 } | |
1284 return outPartIdxLB; | |
1285 } | |
1286 | |
1287 /* Derives the partition index of neighboring bottom right block */ | |
1288 uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const | |
1289 { | |
1290 uint32_t outPartIdxRB; | |
1291 outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + | |
1292 ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * s_numPartInCUSize + | |
1293 (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; | |
1294 | |
1295 switch (m_partSize[0]) | |
1296 { | |
1297 case SIZE_2Nx2N: | |
1298 outPartIdxRB += m_numPartitions >> 1; | |
1299 break; | |
1300 case SIZE_2NxN: | |
1301 outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0; | |
1302 break; | |
1303 case SIZE_Nx2N: | |
1304 outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2; | |
1305 break; | |
1306 case SIZE_NxN: | |
1307 outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1); | |
1308 break; | |
1309 case SIZE_2NxnU: | |
1310 outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); | |
1311 break; | |
1312 case SIZE_2NxnD: | |
1313 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); | |
1314 break; | |
1315 case SIZE_nLx2N: | |
1316 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4); | |
1317 break; | |
1318 case SIZE_nRx2N: | |
1319 outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4); | |
1320 break; | |
1321 default: | |
1322 X265_CHECK(0, "unexpected part index\n"); | |
1323 break; | |
1324 } | |
1325 return outPartIdxRB; | |
1326 } | |
1327 | |
1328 bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const | |
1329 { | |
1330 if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx]) | |
1331 return false; | |
1332 | |
1333 for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++) | |
1334 { | |
1335 if (m_interDir[absPartIdx] & (1 << refListIdx)) | |
1336 { | |
1337 if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] || | |
1338 m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx]) | |
1339 return false; | |
1340 } | |
1341 } | |
1342 | |
1343 return true; | |
1344 } | |
1345 | |
1346 /* Construct list of merging candidates, returns count */ | |
1347 uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const | |
1348 { | |
1349 uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; | |
1350 const bool isInterB = m_slice->isInterB(); | |
1351 | |
1352 const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand; | |
1353 | |
1354 for (uint32_t i = 0; i < maxNumMergeCand; ++i) | |
1355 { | |
1356 candMvField[i][0].mv = 0; | |
1357 candMvField[i][1].mv = 0; | |
1358 candMvField[i][0].refIdx = REF_NOT_VALID; | |
1359 candMvField[i][1].refIdx = REF_NOT_VALID; | |
1360 } | |
1361 | |
1362 /* calculate the location of upper-left corner pixel and size of the current PU */ | |
1363 int xP, yP, nPSW, nPSH; | |
1364 | |
1365 int cuSize = 1 << m_log2CUSize[0]; | |
1366 int partMode = m_partSize[0]; | |
1367 | |
1368 int tmp = partTable[partMode][puIdx][0]; | |
1369 nPSW = ((tmp >> 4) * cuSize) >> 2; | |
1370 nPSH = ((tmp & 0xF) * cuSize) >> 2; | |
1371 | |
1372 tmp = partTable[partMode][puIdx][1]; | |
1373 xP = ((tmp >> 4) * cuSize) >> 2; | |
1374 yP = ((tmp & 0xF) * cuSize) >> 2; | |
1375 | |
1376 uint32_t count = 0; | |
1377 | |
1378 uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); | |
1379 PartSize curPS = (PartSize)m_partSize[absPartIdx]; | |
1380 | |
1381 // left | |
1382 uint32_t leftPartIdx = 0; | |
1383 const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB); | |
1384 bool isAvailableA1 = cuLeft && | |
1385 cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) && | |
1386 !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) && | |
1387 cuLeft->isInter(leftPartIdx); | |
1388 if (isAvailableA1) | |
1389 { | |
1390 // get Inter Dir | |
1391 candDir[count] = cuLeft->m_interDir[leftPartIdx]; | |
1392 // get Mv from Left | |
1393 cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]); | |
1394 if (isInterB) | |
1395 cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]); | |
1396 | |
1397 if (++count == maxNumMergeCand) | |
1398 return maxNumMergeCand; | |
1399 } | |
1400 | |
1401 deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); | |
1402 | |
1403 // above | |
1404 uint32_t abovePartIdx = 0; | |
1405 const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT); | |
1406 bool isAvailableB1 = cuAbove && | |
1407 cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) && | |
1408 !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) && | |
1409 cuAbove->isInter(abovePartIdx); | |
1410 if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx))) | |
1411 { | |
1412 // get Inter Dir | |
1413 candDir[count] = cuAbove->m_interDir[abovePartIdx]; | |
1414 // get Mv from Left | |
1415 cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]); | |
1416 if (isInterB) | |
1417 cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]); | |
1418 | |
1419 if (++count == maxNumMergeCand) | |
1420 return maxNumMergeCand; | |
1421 } | |
1422 | |
1423 // above right | |
1424 uint32_t aboveRightPartIdx = 0; | |
1425 const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT); | |
1426 bool isAvailableB0 = cuAboveRight && | |
1427 cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) && | |
1428 cuAboveRight->isInter(aboveRightPartIdx); | |
1429 if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx))) | |
1430 { | |
1431 // get Inter Dir | |
1432 candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx]; | |
1433 // get Mv from Left | |
1434 cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]); | |
1435 if (isInterB) | |
1436 cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]); | |
1437 | |
1438 if (++count == maxNumMergeCand) | |
1439 return maxNumMergeCand; | |
1440 } | |
1441 | |
1442 // left bottom | |
1443 uint32_t leftBottomPartIdx = 0; | |
1444 const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB); | |
1445 bool isAvailableA0 = cuLeftBottom && | |
1446 cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) && | |
1447 cuLeftBottom->isInter(leftBottomPartIdx); | |
1448 if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx))) | |
1449 { | |
1450 // get Inter Dir | |
1451 candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx]; | |
1452 // get Mv from Left | |
1453 cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]); | |
1454 if (isInterB) | |
1455 cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]); | |
1456 | |
1457 if (++count == maxNumMergeCand) | |
1458 return maxNumMergeCand; | |
1459 } | |
1460 | |
1461 // above left | |
1462 if (count < 4) | |
1463 { | |
1464 uint32_t aboveLeftPartIdx = 0; | |
1465 const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr); | |
1466 bool isAvailableB2 = cuAboveLeft && | |
1467 cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) && | |
1468 cuAboveLeft->isInter(aboveLeftPartIdx); | |
1469 if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx)) | |
1470 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx))) | |
1471 { | |
1472 // get Inter Dir | |
1473 candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx]; | |
1474 // get Mv from Left | |
1475 cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]); | |
1476 if (isInterB) | |
1477 cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]); | |
1478 | |
1479 if (++count == maxNumMergeCand) | |
1480 return maxNumMergeCand; | |
1481 } | |
1482 } | |
1483 if (m_slice->m_sps->bTemporalMVPEnabled) | |
1484 { | |
1485 uint32_t partIdxRB = deriveRightBottomIdx(puIdx); | |
1486 MV colmv; | |
1487 int ctuIdx = -1; | |
1488 | |
1489 // image boundary check | |
1490 if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && | |
1491 m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) | |
1492 { | |
1493 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; | |
1494 uint32_t numUnits = s_numPartInCUSize; | |
1495 bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU | |
1496 bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last row of CTU | |
1497 | |
1498 if (bNotLastCol && bNotLastRow) | |
1499 { | |
1500 absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1]; | |
1501 ctuIdx = m_cuAddr; | |
1502 } | |
1503 else if (bNotLastCol) | |
1504 absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) & (numUnits - 1)]; | |
1505 else if (bNotLastRow) | |
1506 { | |
1507 absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; | |
1508 ctuIdx = m_cuAddr + 1; | |
1509 } | |
1510 else // is the right bottom corner of CTU | |
1511 absPartAddr = 0; | |
1512 } | |
1513 | |
1514 int maxList = isInterB ? 2 : 1; | |
1515 int dir = 0, refIdx = 0; | |
1516 for (int list = 0; list < maxList; list++) | |
1517 { | |
1518 bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, list, ctuIdx, absPartAddr); | |
1519 if (!bExistMV) | |
1520 { | |
1521 uint32_t partIdxCenter = deriveCenterIdx(puIdx); | |
1522 bExistMV = getColMVP(colmv, refIdx, list, m_cuAddr, partIdxCenter); | |
1523 } | |
1524 if (bExistMV) | |
1525 { | |
1526 dir |= (1 << list); | |
1527 candMvField[count][list].mv = colmv; | |
1528 candMvField[count][list].refIdx = refIdx; | |
1529 } | |
1530 } | |
1531 | |
1532 if (dir != 0) | |
1533 { | |
1534 candDir[count] = (uint8_t)dir; | |
1535 | |
1536 if (++count == maxNumMergeCand) | |
1537 return maxNumMergeCand; | |
1538 } | |
1539 } | |
1540 | |
1541 if (isInterB) | |
1542 { | |
1543 const uint32_t cutoff = count * (count - 1); | |
1544 uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 } | |
1545 uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 } | |
1546 | |
1547 for (uint32_t idx = 0; idx < cutoff; idx++, priorityList0 >>= 2, priorityList1 >>= 2) | |
1548 { | |
1549 int i = priorityList0 & 3; | |
1550 int j = priorityList1 & 3; | |
1551 | |
1552 if ((candDir[i] & 0x1) && (candDir[j] & 0x2)) | |
1553 { | |
1554 // get Mv from cand[i] and cand[j] | |
1555 int refIdxL0 = candMvField[i][0].refIdx; | |
1556 int refIdxL1 = candMvField[j][1].refIdx; | |
1557 int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0]; | |
1558 int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1]; | |
1559 if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv)) | |
1560 { | |
1561 candMvField[count][0].mv = candMvField[i][0].mv; | |
1562 candMvField[count][0].refIdx = refIdxL0; | |
1563 candMvField[count][1].mv = candMvField[j][1].mv; | |
1564 candMvField[count][1].refIdx = refIdxL1; | |
1565 candDir[count] = 3; | |
1566 | |
1567 if (++count == maxNumMergeCand) | |
1568 return maxNumMergeCand; | |
1569 } | |
1570 } | |
1571 } | |
1572 } | |
1573 int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0]; | |
1574 int r = 0; | |
1575 int refcnt = 0; | |
1576 while (count < maxNumMergeCand) | |
1577 { | |
1578 candDir[count] = 1; | |
1579 candMvField[count][0].mv.word = 0; | |
1580 candMvField[count][0].refIdx = r; | |
1581 | |
1582 if (isInterB) | |
1583 { | |
1584 candDir[count] = 3; | |
1585 candMvField[count][1].mv.word = 0; | |
1586 candMvField[count][1].refIdx = r; | |
1587 } | |
1588 | |
1589 count++; | |
1590 | |
1591 if (refcnt == numRefIdx - 1) | |
1592 r = 0; | |
1593 else | |
1594 { | |
1595 ++r; | |
1596 ++refcnt; | |
1597 } | |
1598 } | |
1599 | |
1600 return count; | |
1601 } | |
1602 | |
1603 // Create the PMV list. Called for each reference index. | |
1604 int CUData::getPMV(InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv) const | |
1605 { | |
1606 MV directMV[MD_ABOVE_LEFT + 1]; | |
1607 MV indirectMV[MD_ABOVE_LEFT + 1]; | |
1608 bool validDirect[MD_ABOVE_LEFT + 1]; | |
1609 bool validIndirect[MD_ABOVE_LEFT + 1]; | |
1610 | |
1611 // Left candidate. | |
1612 validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx); | |
1613 validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx); | |
1614 // Top candidate. | |
1615 validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx); | |
1616 validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx); | |
1617 validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx); | |
1618 | |
1619 // Left candidate. | |
1620 validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx); | |
1621 validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx); | |
1622 // Top candidate. | |
1623 validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx); | |
1624 validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx); | |
1625 validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx); | |
1626 | |
1627 int num = 0; | |
1628 // Left predictor search | |
1629 if (validDirect[MD_BELOW_LEFT]) | |
1630 amvpCand[num++] = directMV[MD_BELOW_LEFT]; | |
1631 else if (validDirect[MD_LEFT]) | |
1632 amvpCand[num++] = directMV[MD_LEFT]; | |
1633 else if (validIndirect[MD_BELOW_LEFT]) | |
1634 amvpCand[num++] = indirectMV[MD_BELOW_LEFT]; | |
1635 else if (validIndirect[MD_LEFT]) | |
1636 amvpCand[num++] = indirectMV[MD_LEFT]; | |
1637 | |
1638 bool bAddedSmvp = num > 0; | |
1639 | |
1640 // Above predictor search | |
1641 if (validDirect[MD_ABOVE_RIGHT]) | |
1642 amvpCand[num++] = directMV[MD_ABOVE_RIGHT]; | |
1643 else if (validDirect[MD_ABOVE]) | |
1644 amvpCand[num++] = directMV[MD_ABOVE]; | |
1645 else if (validDirect[MD_ABOVE_LEFT]) | |
1646 amvpCand[num++] = directMV[MD_ABOVE_LEFT]; | |
1647 | |
1648 if (!bAddedSmvp) | |
1649 { | |
1650 if (validIndirect[MD_ABOVE_RIGHT]) | |
1651 amvpCand[num++] = indirectMV[MD_ABOVE_RIGHT]; | |
1652 else if (validIndirect[MD_ABOVE]) | |
1653 amvpCand[num++] = indirectMV[MD_ABOVE]; | |
1654 else if (validIndirect[MD_ABOVE_LEFT]) | |
1655 amvpCand[num++] = indirectMV[MD_ABOVE_LEFT]; | |
1656 } | |
1657 | |
1658 int numMvc = 0; | |
1659 for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++) | |
1660 { | |
1661 if (validDirect[dir] && directMV[dir].notZero()) | |
1662 pmv[numMvc++] = directMV[dir]; | |
1663 | |
1664 if (validIndirect[dir] && indirectMV[dir].notZero()) | |
1665 pmv[numMvc++] = indirectMV[dir]; | |
1666 } | |
1667 | |
1668 if (num == 2) | |
1669 num -= amvpCand[0] == amvpCand[1]; | |
1670 | |
1671 // Get the collocated candidate. At this step, either the first candidate | |
1672 // was found or its value is 0. | |
1673 if (m_slice->m_sps->bTemporalMVPEnabled && num < 2) | |
1674 { | |
1675 int tempRefIdx = neighbours[MD_COLLOCATED].refIdx[picList]; | |
1676 if (tempRefIdx != -1) | |
1677 { | |
1678 uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList]; | |
1679 const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; | |
1680 const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); | |
1681 | |
1682 // Scale the vector | |
1683 int colRefPOC = colCU->m_slice->m_refPOCList[tempRefIdx >> 4][tempRefIdx & 0xf]; | |
1684 int colPOC = colCU->m_slice->m_poc; | |
1685 | |
1686 int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1687 int curPOC = m_slice->m_poc; | |
1688 | |
1689 pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC); | |
1690 } | |
1691 } | |
1692 | |
1693 while (num < AMVP_NUM_CANDS) | |
1694 amvpCand[num++] = 0; | |
1695 | |
1696 return numMvc; | |
1697 } | |
1698 | |
1699 /* Constructs a list of candidates for AMVP, and a larger list of motion candidates */ | |
1700 void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const | |
1701 { | |
1702 // Set the temporal neighbour to unavailable by default. | |
1703 neighbours[MD_COLLOCATED].unifiedRef = -1; | |
1704 | |
1705 uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); | |
1706 deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); | |
1707 | |
1708 // Load the spatial MVs. | |
1709 getInterNeighbourMV(neighbours + MD_BELOW_LEFT, partIdxLB, MD_BELOW_LEFT); | |
1710 getInterNeighbourMV(neighbours + MD_LEFT, partIdxLB, MD_LEFT); | |
1711 getInterNeighbourMV(neighbours + MD_ABOVE_RIGHT,partIdxRT, MD_ABOVE_RIGHT); | |
1712 getInterNeighbourMV(neighbours + MD_ABOVE, partIdxRT, MD_ABOVE); | |
1713 getInterNeighbourMV(neighbours + MD_ABOVE_LEFT, partIdxLT, MD_ABOVE_LEFT); | |
1714 | |
1715 if (m_slice->m_sps->bTemporalMVPEnabled) | |
1716 { | |
1717 uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; | |
1718 uint32_t partIdxRB = deriveRightBottomIdx(puIdx); | |
1719 | |
1720 // co-located RightBottom temporal predictor (H) | |
1721 int ctuIdx = -1; | |
1722 | |
1723 // image boundary check | |
1724 if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && | |
1725 m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) | |
1726 { | |
1727 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; | |
1728 uint32_t numUnits = s_numPartInCUSize; | |
1729 bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU | |
1730 bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last row of CTU | |
1731 | |
1732 if (bNotLastCol && bNotLastRow) | |
1733 { | |
1734 absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1]; | |
1735 ctuIdx = m_cuAddr; | |
1736 } | |
1737 else if (bNotLastCol) | |
1738 absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) & (numUnits - 1)]; | |
1739 else if (bNotLastRow) | |
1740 { | |
1741 absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; | |
1742 ctuIdx = m_cuAddr + 1; | |
1743 } | |
1744 else // is the right bottom corner of CTU | |
1745 absPartAddr = 0; | |
1746 } | |
1747 | |
1748 if (!(ctuIdx >= 0 && getCollocatedMV(ctuIdx, absPartAddr, neighbours + MD_COLLOCATED))) | |
1749 { | |
1750 uint32_t partIdxCenter = deriveCenterIdx(puIdx); | |
1751 uint32_t curCTUIdx = m_cuAddr; | |
1752 getCollocatedMV(curCTUIdx, partIdxCenter, neighbours + MD_COLLOCATED); | |
1753 } | |
1754 } | |
1755 } | |
1756 | |
1757 void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const | |
1758 { | |
1759 const CUData* tmpCU = NULL; | |
1760 uint32_t idx = 0; | |
1761 | |
1762 switch (dir) | |
1763 { | |
1764 case MD_LEFT: | |
1765 tmpCU = getPULeft(idx, partUnitIdx); | |
1766 break; | |
1767 case MD_ABOVE: | |
1768 tmpCU = getPUAbove(idx, partUnitIdx); | |
1769 break; | |
1770 case MD_ABOVE_RIGHT: | |
1771 tmpCU = getPUAboveRight(idx, partUnitIdx); | |
1772 break; | |
1773 case MD_BELOW_LEFT: | |
1774 tmpCU = getPUBelowLeft(idx, partUnitIdx); | |
1775 break; | |
1776 case MD_ABOVE_LEFT: | |
1777 tmpCU = getPUAboveLeft(idx, partUnitIdx); | |
1778 break; | |
1779 default: | |
1780 break; | |
1781 } | |
1782 | |
1783 if (!tmpCU) | |
1784 { | |
1785 // Mark the PMV as unavailable. | |
1786 for (int i = 0; i < 2; i++) | |
1787 neighbour->refIdx[i] = -1; | |
1788 return; | |
1789 } | |
1790 | |
1791 for (int i = 0; i < 2; i++) | |
1792 { | |
1793 // Get the MV. | |
1794 neighbour->mv[i] = tmpCU->m_mv[i][idx]; | |
1795 | |
1796 // Get the reference idx. | |
1797 neighbour->refIdx[i] = tmpCU->m_refIdx[i][idx]; | |
1798 } | |
1799 } | |
1800 | |
1801 /* Clip motion vector to within slightly padded boundary of picture (the | |
1802 * MV may reference a block that is completely within the padded area). | |
1803 * Note this function is unaware of how much of this picture is actually | |
1804 * available for use (re: frame parallelism) */ | |
1805 void CUData::clipMv(MV& outMV) const | |
1806 { | |
1807 const uint32_t mvshift = 2; | |
1808 uint32_t offset = 8; | |
1809 | |
1810 int16_t xmax = (int16_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift); | |
1811 int16_t xmin = -(int16_t)((g_maxCUSize + offset + m_cuPelX - 1) << mvshift); | |
1812 | |
1813 int16_t ymax = (int16_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift); | |
1814 int16_t ymin = -(int16_t)((g_maxCUSize + offset + m_cuPelY - 1) << mvshift); | |
1815 | |
1816 outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x)); | |
1817 outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y)); | |
1818 } | |
1819 | |
1820 // Load direct spatial MV if available. | |
1821 bool CUData::getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const | |
1822 { | |
1823 int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1824 for (int i = 0; i < 2; i++, picList = !picList) | |
1825 { | |
1826 int partRefIdx = neighbours->refIdx[picList]; | |
1827 if (partRefIdx >= 0 && curRefPOC == m_slice->m_refPOCList[picList][partRefIdx]) | |
1828 { | |
1829 pmv = neighbours->mv[picList]; | |
1830 return true; | |
1831 } | |
1832 } | |
1833 return false; | |
1834 } | |
1835 | |
1836 // Load indirect spatial MV if available. An indirect MV has to be scaled. | |
1837 bool CUData::getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const | |
1838 { | |
1839 int curPOC = m_slice->m_poc; | |
1840 int neibPOC = curPOC; | |
1841 int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; | |
1842 | |
1843 for (int i = 0; i < 2; i++, picList = !picList) | |
1844 { | |
1845 int partRefIdx = neighbours->refIdx[picList]; | |
1846 if (partRefIdx >= 0) | |
1847 { | |
1848 int neibRefPOC = m_slice->m_refPOCList[picList][partRefIdx]; | |
1849 MV mvp = neighbours->mv[picList]; | |
1850 | |
1851 outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC); | |
1852 return true; | |
1853 } | |
1854 } | |
1855 return false; | |
1856 } | |
1857 | |
1858 bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const | |
1859 { | |
1860 const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; | |
1861 const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); | |
1862 | |
1863 uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK; | |
1864 if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr)) | |
1865 return false; | |
1866 | |
1867 int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag; | |
1868 | |
1869 int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; | |
1870 | |
1871 if (colRefIdx < 0) | |
1872 { | |
1873 colRefPicList = !colRefPicList; | |
1874 colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; | |
1875 | |
1876 if (colRefIdx < 0) | |
1877 return false; | |
1878 } | |
1879 | |
1880 // Scale the vector | |
1881 int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx]; | |
1882 int colPOC = colCU->m_slice->m_poc; | |
1883 MV colmv = colCU->m_mv[colRefPicList][absPartAddr]; | |
1884 | |
1885 int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx]; | |
1886 int curPOC = m_slice->m_poc; | |
1887 | |
1888 outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC); | |
1889 return true; | |
1890 } | |
1891 | |
1892 // Cache the collocated MV. | |
1893 bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const | |
1894 { | |
1895 const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; | |
1896 const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); | |
1897 | |
1898 uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK; | |
1899 if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr)) | |
1900 return false; | |
1901 | |
1902 for (int list = 0; list < 2; list++) | |
1903 { | |
1904 neighbour->cuAddr[list] = cuAddr; | |
1905 int colRefPicList = m_slice->m_bCheckLDC ? list : m_slice->m_colFromL0Flag; | |
1906 int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; | |
1907 | |
1908 if (colRefIdx < 0) | |
1909 colRefPicList = !colRefPicList; | |
1910 | |
1911 neighbour->refIdx[list] = colCU->m_refIdx[colRefPicList][absPartAddr]; | |
1912 neighbour->refIdx[list] |= colRefPicList << 4; | |
1913 | |
1914 neighbour->mv[list] = colCU->m_mv[colRefPicList][absPartAddr]; | |
1915 } | |
1916 | |
1917 return neighbour->unifiedRef != -1; | |
1918 } | |
1919 | |
1920 MV CUData::scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const | |
1921 { | |
1922 int diffPocD = colPOC - colRefPOC; | |
1923 int diffPocB = curPOC - curRefPOC; | |
1924 | |
1925 if (diffPocD == diffPocB) | |
1926 return inMV; | |
1927 else | |
1928 { | |
1929 int tdb = x265_clip3(-128, 127, diffPocB); | |
1930 int tdd = x265_clip3(-128, 127, diffPocD); | |
1931 int x = (0x4000 + abs(tdd / 2)) / tdd; | |
1932 int scale = x265_clip3(-4096, 4095, (tdb * x + 32) >> 6); | |
1933 return scaleMv(inMV, scale); | |
1934 } | |
1935 } | |
1936 | |
1937 uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const | |
1938 { | |
1939 uint32_t absPartIdx; | |
1940 int puWidth, puHeight; | |
1941 | |
1942 getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight); | |
1943 | |
1944 return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx] | |
1945 + (puHeight >> (LOG2_UNIT_SIZE + 1)) * s_numPartInCUSize | |
1946 + (puWidth >> (LOG2_UNIT_SIZE + 1))]; | |
1947 } | |
1948 | |
1949 void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const | |
1950 { | |
1951 bool bIsIntra = isIntra(absPartIdx); | |
1952 | |
1953 // set the group layout | |
1954 result.log2TrSizeCG = log2TrSize - 2; | |
1955 | |
1956 // set the scan orders | |
1957 if (bIsIntra) | |
1958 { | |
1959 uint32_t dirMode; | |
1960 | |
1961 if (bIsLuma) | |
1962 dirMode = m_lumaIntraDir[absPartIdx]; | |
1963 else | |
1964 { | |
1965 dirMode = m_chromaIntraDir[absPartIdx]; | |
1966 if (dirMode == DM_CHROMA_IDX) | |
1967 { | |
1968 dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC]; | |
1969 dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode; | |
1970 } | |
1971 } | |
1972 | |
1973 if (log2TrSize <= (MDCS_LOG2_MAX_SIZE - m_hChromaShift) || (bIsLuma && log2TrSize == MDCS_LOG2_MAX_SIZE)) | |
1974 result.scanType = dirMode >= 22 && dirMode <= 30 ? SCAN_HOR : dirMode >= 6 && dirMode <= 14 ? SCAN_VER : SCAN_DIAG; | |
1975 else | |
1976 result.scanType = SCAN_DIAG; | |
1977 } | |
1978 else | |
1979 result.scanType = SCAN_DIAG; | |
1980 | |
1981 result.scan = g_scanOrder[result.scanType][log2TrSize - 2]; | |
1982 result.scanCG = g_scanOrderCG[result.scanType][result.log2TrSizeCG]; | |
1983 | |
1984 if (log2TrSize == 2) | |
1985 result.firstSignificanceMapContext = 0; | |
1986 else if (log2TrSize == 3) | |
1987 result.firstSignificanceMapContext = (result.scanType != SCAN_DIAG && bIsLuma) ? 15 : 9; | |
1988 else | |
1989 result.firstSignificanceMapContext = bIsLuma ? 21 : 12; | |
1990 } | |
1991 | |
1992 #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag)) | |
1993 | |
1994 void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) | |
1995 { | |
1996 // Initialize the coding blocks inside the CTB | |
1997 for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= g_log2Size[minCUSize]; log2CUSize--) | |
1998 { | |
1999 uint32_t blockSize = 1 << log2CUSize; | |
2000 uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize); | |
2001 int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize]; | |
2002 | |
2003 for (uint32_t sbY = 0; sbY < sbWidth; sbY++) | |
2004 { | |
2005 for (uint32_t sbX = 0; sbX < sbWidth; sbX++) | |
2006 { | |
2007 uint32_t depthIdx = g_depthScanIdx[sbY][sbX]; | |
2008 uint32_t cuIdx = rangeCUIdx + depthIdx; | |
2009 uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2); | |
2010 uint32_t px = sbX * blockSize; | |
2011 uint32_t py = sbY * blockSize; | |
2012 int32_t presentFlag = px < ctuWidth && py < ctuHeight; | |
2013 int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight); | |
2014 | |
2015 /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */ | |
2016 uint32_t xOffset = (sbX * blockSize) >> 3; | |
2017 uint32_t yOffset = (sbY * blockSize) >> 3; | |
2018 X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n"); | |
2019 | |
2020 CUGeom *cu = cuDataArray + cuIdx; | |
2021 cu->log2CUSize = log2CUSize; | |
2022 cu->childOffset = childIdx - cuIdx; | |
2023 cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4; | |
2024 cu->numPartitions = (NUM_4x4_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2)); | |
2025 cu->depth = g_log2Size[maxCUSize] - log2CUSize; | |
2026 | |
2027 cu->flags = 0; | |
2028 CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag); | |
2029 CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag); | |
2030 CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag); | |
2031 } | |
2032 } | |
2033 rangeCUIdx += sbWidth * sbWidth; | |
2034 } | |
2035 } |