Mercurial > hg > forks > libbpg
comparison x265/source/common/cudata.h @ 0:772086c29cc7
Initial import.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Wed, 16 Nov 2016 11:16:33 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:772086c29cc7 |
---|---|
1 /***************************************************************************** | |
2 * Copyright (C) 2015 x265 project | |
3 * | |
4 * Authors: Steve Borho <steve@borho.org> | |
5 * | |
6 * This program is free software; you can redistribute it and/or modify | |
7 * it under the terms of the GNU General Public License as published by | |
8 * the Free Software Foundation; either version 2 of the License, or | |
9 * (at your option) any later version. | |
10 * | |
11 * This program is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 * GNU General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU General Public License | |
17 * along with this program; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 * | |
20 * This program is also available under a commercial proprietary license. | |
21 * For more information, contact us at license @ x265.com. | |
22 *****************************************************************************/ | |
23 | |
24 #ifndef X265_CUDATA_H | |
25 #define X265_CUDATA_H | |
26 | |
27 #include "common.h" | |
28 #include "slice.h" | |
29 #include "mv.h" | |
30 | |
31 namespace X265_NS { | |
32 // private namespace | |
33 | |
34 class FrameData; | |
35 class Slice; | |
36 struct TUEntropyCodingParameters; | |
37 struct CUDataMemPool; | |
38 | |
39 enum PartSize | |
40 { | |
41 SIZE_2Nx2N, // symmetric motion partition, 2Nx2N | |
42 SIZE_2NxN, // symmetric motion partition, 2Nx N | |
43 SIZE_Nx2N, // symmetric motion partition, Nx2N | |
44 SIZE_NxN, // symmetric motion partition, Nx N | |
45 SIZE_2NxnU, // asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2) | |
46 SIZE_2NxnD, // asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2) | |
47 SIZE_nLx2N, // asymmetric motion partition, ( N/2)x2N + (3N/2)x2N | |
48 SIZE_nRx2N, // asymmetric motion partition, (3N/2)x2N + ( N/2)x2N | |
49 NUM_SIZES | |
50 }; | |
51 | |
52 enum PredMode | |
53 { | |
54 MODE_NONE = 0, | |
55 MODE_INTER = (1 << 0), | |
56 MODE_INTRA = (1 << 1), | |
57 MODE_SKIP = (1 << 2) | MODE_INTER | |
58 }; | |
59 | |
60 // motion vector predictor direction used in AMVP | |
61 enum MVP_DIR | |
62 { | |
63 MD_LEFT = 0, // MVP of left block | |
64 MD_ABOVE, // MVP of above block | |
65 MD_ABOVE_RIGHT, // MVP of above right block | |
66 MD_BELOW_LEFT, // MVP of below left block | |
67 MD_ABOVE_LEFT, // MVP of above left block | |
68 MD_COLLOCATED // MVP of temporal neighbour | |
69 }; | |
70 | |
71 struct CUGeom | |
72 { | |
73 enum { | |
74 INTRA = 1<<0, // CU is intra predicted | |
75 PRESENT = 1<<1, // CU is not completely outside the frame | |
76 SPLIT_MANDATORY = 1<<2, // CU split is mandatory if CU is inside frame and can be split | |
77 LEAF = 1<<3, // CU is a leaf node of the CTU | |
78 SPLIT = 1<<4, // CU is currently split in four child CUs. | |
79 }; | |
80 | |
81 // (1 + 4 + 16 + 64) = 85. | |
82 enum { MAX_GEOMS = 85 }; | |
83 | |
84 uint32_t log2CUSize; // Log of the CU size. | |
85 uint32_t childOffset; // offset of the first child CU from current CU | |
86 uint32_t absPartIdx; // Part index of this CU in terms of 4x4 blocks. | |
87 uint32_t numPartitions; // Number of 4x4 blocks in the CU | |
88 uint32_t flags; // CU flags. | |
89 uint32_t depth; // depth of this CU relative from CTU | |
90 }; | |
91 | |
92 struct MVField | |
93 { | |
94 MV mv; | |
95 int refIdx; | |
96 }; | |
97 | |
98 // Structure that keeps the neighbour's MV information. | |
99 struct InterNeighbourMV | |
100 { | |
101 // Neighbour MV. The index represents the list. | |
102 MV mv[2]; | |
103 | |
104 // Collocated right bottom CU addr. | |
105 uint32_t cuAddr[2]; | |
106 | |
107 // For spatial prediction, this field contains the reference index | |
108 // in each list (-1 if not available). | |
109 // | |
110 // For temporal prediction, the first value is used for the | |
111 // prediction with list 0. The second value is used for the prediction | |
112 // with list 1. For each value, the first four bits are the reference index | |
113 // associated to the PMV, and the fifth bit is the list associated to the PMV. | |
114 // if both reference indices are -1, then unifiedRef is also -1 | |
115 union { int16_t refIdx[2]; int32_t unifiedRef; }; | |
116 }; | |
117 | |
118 typedef void(*cucopy_t)(uint8_t* dst, uint8_t* src); // dst and src are aligned to MIN(size, 32) | |
119 typedef void(*cubcast_t)(uint8_t* dst, uint8_t val); // dst is aligned to MIN(size, 32) | |
120 | |
121 // Partition count table, index represents partitioning mode. | |
122 const uint32_t nbPartsTable[8] = { 1, 2, 2, 4, 2, 2, 2, 2 }; | |
123 | |
124 // Partition table. | |
125 // First index is partitioning mode. Second index is partition index. | |
126 // Third index is 0 for partition sizes, 1 for partition offsets. The | |
127 // sizes and offsets are encoded as two packed 4-bit values (X,Y). | |
128 // X and Y represent 1/4 fractions of the block size. | |
129 const uint32_t partTable[8][4][2] = | |
130 { | |
131 // XY | |
132 { { 0x44, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2Nx2N. | |
133 { { 0x42, 0x00 }, { 0x42, 0x02 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxN. | |
134 { { 0x24, 0x00 }, { 0x24, 0x20 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_Nx2N. | |
135 { { 0x22, 0x00 }, { 0x22, 0x20 }, { 0x22, 0x02 }, { 0x22, 0x22 } }, // SIZE_NxN. | |
136 { { 0x41, 0x00 }, { 0x43, 0x01 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnU. | |
137 { { 0x43, 0x00 }, { 0x41, 0x03 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_2NxnD. | |
138 { { 0x14, 0x00 }, { 0x34, 0x10 }, { 0x00, 0x00 }, { 0x00, 0x00 } }, // SIZE_nLx2N. | |
139 { { 0x34, 0x00 }, { 0x14, 0x30 }, { 0x00, 0x00 }, { 0x00, 0x00 } } // SIZE_nRx2N. | |
140 }; | |
141 | |
142 // Partition Address table. | |
143 // First index is partitioning mode. Second index is partition address. | |
144 const uint32_t partAddrTable[8][4] = | |
145 { | |
146 { 0x00, 0x00, 0x00, 0x00 }, // SIZE_2Nx2N. | |
147 { 0x00, 0x08, 0x08, 0x08 }, // SIZE_2NxN. | |
148 { 0x00, 0x04, 0x04, 0x04 }, // SIZE_Nx2N. | |
149 { 0x00, 0x04, 0x08, 0x0C }, // SIZE_NxN. | |
150 { 0x00, 0x02, 0x02, 0x02 }, // SIZE_2NxnU. | |
151 { 0x00, 0x0A, 0x0A, 0x0A }, // SIZE_2NxnD. | |
152 { 0x00, 0x01, 0x01, 0x01 }, // SIZE_nLx2N. | |
153 { 0x00, 0x05, 0x05, 0x05 } // SIZE_nRx2N. | |
154 }; | |
155 | |
156 // Holds part data for a CU of a given size, from an 8x8 CU to a CTU | |
157 class CUData | |
158 { | |
159 public: | |
160 | |
161 static cubcast_t s_partSet[NUM_FULL_DEPTH]; // pointer to broadcast set functions per absolute depth | |
162 static uint32_t s_numPartInCUSize; | |
163 | |
164 FrameData* m_encData; | |
165 const Slice* m_slice; | |
166 | |
167 cucopy_t m_partCopy; // pointer to function that copies m_numPartitions elements | |
168 cubcast_t m_partSet; // pointer to function that sets m_numPartitions elements | |
169 cucopy_t m_subPartCopy; // pointer to function that copies m_numPartitions/4 elements, may be NULL | |
170 cubcast_t m_subPartSet; // pointer to function that sets m_numPartitions/4 elements, may be NULL | |
171 | |
172 uint32_t m_cuAddr; // address of CTU within the picture in raster order | |
173 uint32_t m_absIdxInCTU; // address of CU within its CTU in Z scan order | |
174 uint32_t m_cuPelX; // CU position within the picture, in pixels (X) | |
175 uint32_t m_cuPelY; // CU position within the picture, in pixels (Y) | |
176 uint32_t m_numPartitions; // maximum number of 4x4 partitions within this CU | |
177 | |
178 uint32_t m_chromaFormat; | |
179 uint32_t m_hChromaShift; | |
180 uint32_t m_vChromaShift; | |
181 | |
182 /* Per-part data, stored contiguously */ | |
183 int8_t* m_qp; // array of QP values | |
184 uint8_t* m_log2CUSize; // array of cu log2Size TODO: seems redundant to depth | |
185 uint8_t* m_lumaIntraDir; // array of intra directions (luma) | |
186 uint8_t* m_tqBypass; // array of CU lossless flags | |
187 int8_t* m_refIdx[2]; // array of motion reference indices per list | |
188 uint8_t* m_cuDepth; // array of depths | |
189 uint8_t* m_predMode; // array of prediction modes | |
190 uint8_t* m_partSize; // array of partition sizes | |
191 uint8_t* m_mergeFlag; // array of merge flags | |
192 uint8_t* m_interDir; // array of inter directions | |
193 uint8_t* m_mvpIdx[2]; // array of motion vector predictor candidates or merge candidate indices [0] | |
194 uint8_t* m_tuDepth; // array of transform indices | |
195 uint8_t* m_transformSkip[3]; // array of transform skipping flags per plane | |
196 uint8_t* m_cbf[3]; // array of coded block flags (CBF) per plane | |
197 uint8_t* m_chromaIntraDir; // array of intra directions (chroma) | |
198 enum { BytesPerPartition = 21 }; // combined sizeof() of all per-part data | |
199 | |
200 coeff_t* m_trCoeff[3]; // transformed coefficient buffer per plane | |
201 | |
202 MV* m_mv[2]; // array of motion vectors per list | |
203 MV* m_mvd[2]; // array of coded motion vector deltas per list | |
204 enum { TMVP_UNIT_MASK = 0xF0 }; // mask for mapping index to into a compressed (reference) MV field | |
205 | |
206 const CUData* m_cuAboveLeft; // pointer to above-left neighbor CTU | |
207 const CUData* m_cuAboveRight; // pointer to above-right neighbor CTU | |
208 const CUData* m_cuAbove; // pointer to above neighbor CTU | |
209 const CUData* m_cuLeft; // pointer to left neighbor CTU | |
210 | |
211 CUData(); | |
212 | |
213 void initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance); | |
214 static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]); | |
215 | |
216 void initCTU(const Frame& frame, uint32_t cuAddr, int qp); | |
217 void initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp); | |
218 void initLosslessCU(const CUData& cu, const CUGeom& cuGeom); | |
219 | |
220 void copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx); | |
221 void setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx); | |
222 void copyToPic(uint32_t depth) const; | |
223 | |
224 /* RD-0 methods called only from encodeResidue */ | |
225 void copyFromPic(const CUData& ctu, const CUGeom& cuGeom); | |
226 void updatePic(uint32_t depth) const; | |
227 | |
228 void setPartSizeSubParts(PartSize size) { m_partSet(m_partSize, (uint8_t)size); } | |
229 void setPredModeSubParts(PredMode mode) { m_partSet(m_predMode, (uint8_t)mode); } | |
230 void clearCbf() { m_partSet(m_cbf[0], 0); m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0); } | |
231 | |
232 /* these functions all take depth as an absolute depth from CTU, it is used to calculate the number of parts to copy */ | |
233 void setQPSubParts(int8_t qp, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); } | |
234 void setTUDepthSubParts(uint8_t tuDepth, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_tuDepth + absPartIdx, tuDepth); } | |
235 void setLumaIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_lumaIntraDir + absPartIdx, dir); } | |
236 void setChromIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_chromaIntraDir + absPartIdx, dir); } | |
237 void setCbfSubParts(uint8_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_cbf[ttype] + absPartIdx, cbf); } | |
238 void setCbfPartRange(uint8_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t coveredPartIdxes) { memset(m_cbf[ttype] + absPartIdx, cbf, coveredPartIdxes); } | |
239 void setTransformSkipSubParts(uint8_t tskip, TextType ttype, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_transformSkip[ttype] + absPartIdx, tskip); } | |
240 void setTransformSkipPartRange(uint8_t tskip, TextType ttype, uint32_t absPartIdx, uint32_t coveredPartIdxes) { memset(m_transformSkip[ttype] + absPartIdx, tskip, coveredPartIdxes); } | |
241 | |
242 bool setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth); | |
243 | |
244 void setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx); | |
245 void setPUMv(int list, const MV& mv, int absPartIdx, int puIdx); | |
246 void setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx); | |
247 | |
248 uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; } | |
249 uint8_t getQtRootCbf(uint32_t absPartIdx) const { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]; } | |
250 int8_t getRefQP(uint32_t currAbsIdxInCTU) const; | |
251 uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*candMvField)[2], uint8_t* candDir) const; | |
252 void clipMv(MV& outMV) const; | |
253 int getPMV(InterNeighbourMV *neighbours, uint32_t reference_list, uint32_t refIdx, MV* amvpCand, MV* pmv) const; | |
254 void getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const; | |
255 void getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const; | |
256 void getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const; | |
257 uint32_t getBestRefIdx(uint32_t subPartIdx) const { return ((m_interDir[subPartIdx] & 1) << m_refIdx[0][subPartIdx]) | | |
258 (((m_interDir[subPartIdx] >> 1) & 1) << (m_refIdx[1][subPartIdx] + 16)); } | |
259 uint32_t getPUOffset(uint32_t puIdx, uint32_t absPartIdx) const { return (partAddrTable[(int)m_partSize[absPartIdx]][puIdx] << (g_unitSizeDepth - m_cuDepth[absPartIdx]) * 2) >> 4; } | |
260 | |
261 uint32_t getNumPartInter(uint32_t absPartIdx) const { return nbPartsTable[(int)m_partSize[absPartIdx]]; } | |
262 bool isIntra(uint32_t absPartIdx) const { return m_predMode[absPartIdx] == MODE_INTRA; } | |
263 bool isInter(uint32_t absPartIdx) const { return !!(m_predMode[absPartIdx] & MODE_INTER); } | |
264 bool isSkipped(uint32_t absPartIdx) const { return m_predMode[absPartIdx] == MODE_SKIP; } | |
265 bool isBipredRestriction() const { return m_log2CUSize[0] == 3 && m_partSize[0] != SIZE_2Nx2N; } | |
266 | |
267 void getPartIndexAndSize(uint32_t puIdx, uint32_t& absPartIdx, int& puWidth, int& puHeight) const; | |
268 void getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& mvField) const; | |
269 | |
270 void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const; | |
271 int getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const; | |
272 | |
273 uint32_t getSCUAddr() const { return (m_cuAddr << g_unitSizeDepth * 2) + m_absIdxInCTU; } | |
274 uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const; | |
275 uint32_t getCtxSkipFlag(uint32_t absPartIdx) const; | |
276 void getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const; | |
277 | |
278 const CUData* getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const; | |
279 const CUData* getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const; | |
280 const CUData* getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const; | |
281 const CUData* getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const; | |
282 const CUData* getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const; | |
283 | |
284 const CUData* getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t currAbsIdxInCTU) const; | |
285 const CUData* getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t currAbsIdxInCTU) const; | |
286 | |
287 const CUData* getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const; | |
288 const CUData* getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const; | |
289 | |
290 protected: | |
291 | |
292 template<typename T> | |
293 void setAllPU(T *p, const T& val, int absPartIdx, int puIdx); | |
294 | |
295 int8_t getLastCodedQP(uint32_t absPartIdx) const; | |
296 int getLastValidPartIdx(int absPartIdx) const; | |
297 | |
298 bool hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const; | |
299 | |
300 /* Check whether the current PU and a spatial neighboring PU are in same merge region */ | |
301 bool isDiffMER(int xN, int yN, int xP, int yP) const { return ((xN >> 2) != (xP >> 2)) || ((yN >> 2) != (yP >> 2)); } | |
302 | |
303 // add possible motion vector predictor candidates | |
304 bool getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const; | |
305 bool getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t reference_list, uint32_t refIdx) const; | |
306 void getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const; | |
307 | |
308 bool getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int absPartIdx) const; | |
309 bool getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const; | |
310 | |
311 MV scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const; | |
312 | |
313 void deriveLeftRightTopIdx(uint32_t puIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const; | |
314 | |
315 uint32_t deriveCenterIdx(uint32_t puIdx) const; | |
316 uint32_t deriveRightBottomIdx(uint32_t puIdx) const; | |
317 uint32_t deriveLeftBottomIdx(uint32_t puIdx) const; | |
318 }; | |
319 | |
320 // TU settings for entropy encoding | |
321 struct TUEntropyCodingParameters | |
322 { | |
323 const uint16_t *scan; | |
324 const uint16_t *scanCG; | |
325 ScanType scanType; | |
326 uint32_t log2TrSizeCG; | |
327 uint32_t firstSignificanceMapContext; | |
328 }; | |
329 | |
330 struct CUDataMemPool | |
331 { | |
332 uint8_t* charMemBlock; | |
333 coeff_t* trCoeffMemBlock; | |
334 MV* mvMemBlock; | |
335 | |
336 CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; } | |
337 | |
338 bool create(uint32_t depth, uint32_t csp, uint32_t numInstances) | |
339 { | |
340 uint32_t numPartition = NUM_4x4_PARTITIONS >> (depth * 2); | |
341 uint32_t cuSize = g_maxCUSize >> depth; | |
342 uint32_t sizeL = cuSize * cuSize; | |
343 uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp)); | |
344 CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances); | |
345 CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances * CUData::BytesPerPartition); | |
346 CHECKED_MALLOC(mvMemBlock, MV, numPartition * 4 * numInstances); | |
347 return true; | |
348 | |
349 fail: | |
350 return false; | |
351 } | |
352 | |
353 void destroy() | |
354 { | |
355 X265_FREE(trCoeffMemBlock); | |
356 X265_FREE(mvMemBlock); | |
357 X265_FREE(charMemBlock); | |
358 } | |
359 }; | |
360 } | |
361 | |
362 #endif // ifndef X265_CUDATA_H |