annotate x265/source/common/vec/dct-ssse3.cpp @ 34:5d51fff843eb default tip

A "commit dump" of random changes I've made, as I probably won't be touching this code anymore.
author Matti Hamalainen <ccr@tnsp.org>
date Sun, 08 Mar 2020 19:18:48 +0200
parents 772086c29cc7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 /*****************************************************************************
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 * Copyright (C) 2013 x265 project
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 *
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 * Authors: Steve Borho <steve@borho.org>
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 * Mandar Gurav <mandar@multicorewareinc.com>
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 * Deepthi Devaki Akkoorath <deepthidevaki@multicorewareinc.com>
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7 * Mahesh Pittala <mahesh@multicorewareinc.com>
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8 * Rajesh Paulraj <rajesh@multicorewareinc.com>
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9 * Min Chen <min.chen@multicorewareinc.com>
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
10 * Praveen Kumar Tiwari <praveen@multicorewareinc.com>
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
11 * Nabajit Deka <nabajit@multicorewareinc.com>
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
12 *
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
13 * This program is free software; you can redistribute it and/or modify
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
14 * it under the terms of the GNU General Public License as published by
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
15 * the Free Software Foundation; either version 2 of the License, or
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
16 * (at your option) any later version.
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
17 *
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
18 * This program is distributed in the hope that it will be useful,
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
21 * GNU General Public License for more details.
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
22 *
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
23 * You should have received a copy of the GNU General Public License
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
24 * along with this program; if not, write to the Free Software
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
26 *
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
27 * This program is also available under a commercial proprietary license.
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
28 * For more information, contact us at license @ x265.com.
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
29 *****************************************************************************/
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
30
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
31 #include "common.h"
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
32 #include "primitives.h"
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
33 #include <xmmintrin.h> // SSE
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
34 #include <pmmintrin.h> // SSE3
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
35 #include <tmmintrin.h> // SSSE3
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
36
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
37 #define DCT16_SHIFT1 (3 + X265_DEPTH - 8)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
38 #define DCT16_ADD1 (1 << ((DCT16_SHIFT1) - 1))
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
39
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
40 #define DCT16_SHIFT2 10
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
41 #define DCT16_ADD2 (1 << ((DCT16_SHIFT2) - 1))
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
42
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
43 #define DCT32_SHIFT1 (DCT16_SHIFT1 + 1)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
44 #define DCT32_ADD1 (1 << ((DCT32_SHIFT1) - 1))
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
45
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
46 #define DCT32_SHIFT2 (DCT16_SHIFT2 + 1)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
47 #define DCT32_ADD2 (1 << ((DCT32_SHIFT2) - 1))
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
48
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
49 using namespace X265_NS;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
50
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
51 ALIGN_VAR_32(static const int16_t, tab_dct_8[][8]) =
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
52 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
53 { 0x0100, 0x0F0E, 0x0706, 0x0908, 0x0302, 0x0D0C, 0x0504, 0x0B0A },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
54
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
55 { 64, 64, 64, 64, 64, 64, 64, 64 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
56 { 64, -64, 64, -64, 64, -64, 64, -64 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
57 { 83, 36, 83, 36, 83, 36, 83, 36 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
58 { 36, -83, 36, -83, 36, -83, 36, -83 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
59 { 89, 18, 75, 50, 89, 18, 75, 50 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
60 { 75, -50, -18, -89, 75, -50, -18, -89 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
61 { 50, 75, -89, 18, 50, 75, -89, 18 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
62 { 18, -89, -50, 75, 18, -89, -50, 75 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
63
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
64 { 83, 83, -83, -83, 36, 36, -36, -36 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
65 { 36, 36, -36, -36, -83, -83, 83, 83 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
66 { 89, -89, 18, -18, 75, -75, 50, -50 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
67 { 75, -75, -50, 50, -18, 18, -89, 89 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
68 { 50, -50, 75, -75, -89, 89, 18, -18 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
69 { 18, -18, -89, 89, -50, 50, 75, -75 },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
70 };
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
71
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
72 ALIGN_VAR_32(static const int16_t, tab_dct_16_0[][8]) =
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
73 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
74 { 0x0F0E, 0x0D0C, 0x0B0A, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100 }, // 0
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
75 { 0x0100, 0x0F0E, 0x0706, 0x0908, 0x0302, 0x0D0C, 0x0504, 0x0B0A }, // 1
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
76 { 0x0100, 0x0706, 0x0302, 0x0504, 0x0F0E, 0x0908, 0x0D0C, 0x0B0A }, // 2
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
77 { 0x0F0E, 0x0908, 0x0D0C, 0x0B0A, 0x0100, 0x0706, 0x0302, 0x0504 }, // 3
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
78 };
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
79
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
80 ALIGN_VAR_32(static const int16_t, tab_dct_16_1[][8]) =
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
81 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
82 { 90, 87, 80, 70, 57, 43, 25, 9 }, // 0
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
83 { 87, 57, 9, -43, -80, -90, -70, -25 }, // 1
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
84 { 80, 9, -70, -87, -25, 57, 90, 43 }, // 2
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
85 { 70, -43, -87, 9, 90, 25, -80, -57 }, // 3
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
86 { 57, -80, -25, 90, -9, -87, 43, 70 }, // 4
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
87 { 43, -90, 57, 25, -87, 70, 9, -80 }, // 5
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
88 { 25, -70, 90, -80, 43, 9, -57, 87 }, // 6
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
89 { 9, -25, 43, -57, 70, -80, 87, -90 }, // 7
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
90 { 83, 83, -83, -83, 36, 36, -36, -36 }, // 8
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
91 { 36, 36, -36, -36, -83, -83, 83, 83 }, // 9
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
92 { 89, 89, 18, 18, 75, 75, 50, 50 }, // 10
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
93 { 75, 75, -50, -50, -18, -18, -89, -89 }, // 11
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
94 { 50, 50, 75, 75, -89, -89, 18, 18 }, // 12
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
95 { 18, 18, -89, -89, -50, -50, 75, 75 }, // 13
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
96
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
97 #define MAKE_COEF(a0, a1, a2, a3, a4, a5, a6, a7) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
98 { (a0), -(a0), (a3), -(a3), (a1), -(a1), (a2), -(a2) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
99 }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
100 { (a7), -(a7), (a4), -(a4), (a6), -(a6), (a5), -(a5) },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
101
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
102 MAKE_COEF(90, 87, 80, 70, 57, 43, 25, 9)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
103 MAKE_COEF(87, 57, 9, -43, -80, -90, -70, -25)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
104 MAKE_COEF(80, 9, -70, -87, -25, 57, 90, 43)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
105 MAKE_COEF(70, -43, -87, 9, 90, 25, -80, -57)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
106 MAKE_COEF(57, -80, -25, 90, -9, -87, 43, 70)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
107 MAKE_COEF(43, -90, 57, 25, -87, 70, 9, -80)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
108 MAKE_COEF(25, -70, 90, -80, 43, 9, -57, 87)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
109 MAKE_COEF(9, -25, 43, -57, 70, -80, 87, -90)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
110 #undef MAKE_COEF
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
111 };
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
112
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
113 static void dct16(const int16_t *src, int16_t *dst, intptr_t stride)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
114 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
115 // Const
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
116 __m128i c_4 = _mm_set1_epi32(DCT16_ADD1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
117 __m128i c_512 = _mm_set1_epi32(DCT16_ADD2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
118
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
119 int i;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
120
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
121 ALIGN_VAR_32(int16_t, tmp[16 * 16]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
122
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
123 __m128i T00A, T01A, T02A, T03A, T04A, T05A, T06A, T07A;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
124 __m128i T00B, T01B, T02B, T03B, T04B, T05B, T06B, T07B;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
125 __m128i T10, T11, T12, T13, T14, T15, T16, T17;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
126 __m128i T20, T21, T22, T23, T24, T25, T26, T27;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
127 __m128i T30, T31, T32, T33, T34, T35, T36, T37;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
128 __m128i T40, T41, T42, T43, T44, T45, T46, T47;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
129 __m128i T50, T51, T52, T53;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
130 __m128i T60, T61, T62, T63, T64, T65, T66, T67;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
131 __m128i T70;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
132
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
133 // DCT1
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
134 for (i = 0; i < 16; i += 8)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
135 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
136 T00A = _mm_load_si128((__m128i*)&src[(i + 0) * stride + 0]); // [07 06 05 04 03 02 01 00]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
137 T00B = _mm_load_si128((__m128i*)&src[(i + 0) * stride + 8]); // [0F 0E 0D 0C 0B 0A 09 08]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
138 T01A = _mm_load_si128((__m128i*)&src[(i + 1) * stride + 0]); // [17 16 15 14 13 12 11 10]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
139 T01B = _mm_load_si128((__m128i*)&src[(i + 1) * stride + 8]); // [1F 1E 1D 1C 1B 1A 19 18]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
140 T02A = _mm_load_si128((__m128i*)&src[(i + 2) * stride + 0]); // [27 26 25 24 23 22 21 20]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
141 T02B = _mm_load_si128((__m128i*)&src[(i + 2) * stride + 8]); // [2F 2E 2D 2C 2B 2A 29 28]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
142 T03A = _mm_load_si128((__m128i*)&src[(i + 3) * stride + 0]); // [37 36 35 34 33 32 31 30]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
143 T03B = _mm_load_si128((__m128i*)&src[(i + 3) * stride + 8]); // [3F 3E 3D 3C 3B 3A 39 38]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
144 T04A = _mm_load_si128((__m128i*)&src[(i + 4) * stride + 0]); // [47 46 45 44 43 42 41 40]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
145 T04B = _mm_load_si128((__m128i*)&src[(i + 4) * stride + 8]); // [4F 4E 4D 4C 4B 4A 49 48]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
146 T05A = _mm_load_si128((__m128i*)&src[(i + 5) * stride + 0]); // [57 56 55 54 53 52 51 50]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
147 T05B = _mm_load_si128((__m128i*)&src[(i + 5) * stride + 8]); // [5F 5E 5D 5C 5B 5A 59 58]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
148 T06A = _mm_load_si128((__m128i*)&src[(i + 6) * stride + 0]); // [67 66 65 64 63 62 61 60]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
149 T06B = _mm_load_si128((__m128i*)&src[(i + 6) * stride + 8]); // [6F 6E 6D 6C 6B 6A 69 68]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
150 T07A = _mm_load_si128((__m128i*)&src[(i + 7) * stride + 0]); // [77 76 75 74 73 72 71 70]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
151 T07B = _mm_load_si128((__m128i*)&src[(i + 7) * stride + 8]); // [7F 7E 7D 7C 7B 7A 79 78]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
152
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
153 T00B = _mm_shuffle_epi8(T00B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
154 T01B = _mm_shuffle_epi8(T01B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
155 T02B = _mm_shuffle_epi8(T02B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
156 T03B = _mm_shuffle_epi8(T03B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
157 T04B = _mm_shuffle_epi8(T04B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
158 T05B = _mm_shuffle_epi8(T05B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
159 T06B = _mm_shuffle_epi8(T06B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
160 T07B = _mm_shuffle_epi8(T07B, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
161
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
162 T10 = _mm_add_epi16(T00A, T00B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
163 T11 = _mm_add_epi16(T01A, T01B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
164 T12 = _mm_add_epi16(T02A, T02B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
165 T13 = _mm_add_epi16(T03A, T03B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
166 T14 = _mm_add_epi16(T04A, T04B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
167 T15 = _mm_add_epi16(T05A, T05B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
168 T16 = _mm_add_epi16(T06A, T06B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
169 T17 = _mm_add_epi16(T07A, T07B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
170
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
171 T20 = _mm_sub_epi16(T00A, T00B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
172 T21 = _mm_sub_epi16(T01A, T01B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
173 T22 = _mm_sub_epi16(T02A, T02B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
174 T23 = _mm_sub_epi16(T03A, T03B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
175 T24 = _mm_sub_epi16(T04A, T04B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
176 T25 = _mm_sub_epi16(T05A, T05B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
177 T26 = _mm_sub_epi16(T06A, T06B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
178 T27 = _mm_sub_epi16(T07A, T07B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
179
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
180 T30 = _mm_shuffle_epi8(T10, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
181 T31 = _mm_shuffle_epi8(T11, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
182 T32 = _mm_shuffle_epi8(T12, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
183 T33 = _mm_shuffle_epi8(T13, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
184 T34 = _mm_shuffle_epi8(T14, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
185 T35 = _mm_shuffle_epi8(T15, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
186 T36 = _mm_shuffle_epi8(T16, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
187 T37 = _mm_shuffle_epi8(T17, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
188
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
189 T40 = _mm_hadd_epi16(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
190 T41 = _mm_hadd_epi16(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
191 T42 = _mm_hadd_epi16(T34, T35);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
192 T43 = _mm_hadd_epi16(T36, T37);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
193 T44 = _mm_hsub_epi16(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
194 T45 = _mm_hsub_epi16(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
195 T46 = _mm_hsub_epi16(T34, T35);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
196 T47 = _mm_hsub_epi16(T36, T37);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
197
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
198 T50 = _mm_hadd_epi16(T40, T41);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
199 T51 = _mm_hadd_epi16(T42, T43);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
200 T52 = _mm_hsub_epi16(T40, T41);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
201 T53 = _mm_hsub_epi16(T42, T43);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
202
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
203 T60 = _mm_madd_epi16(T50, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
204 T61 = _mm_madd_epi16(T51, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
205 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
206 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
207 T70 = _mm_packs_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
208 _mm_store_si128((__m128i*)&tmp[0 * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
209
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
210 T60 = _mm_madd_epi16(T50, _mm_load_si128((__m128i*)tab_dct_8[2]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
211 T61 = _mm_madd_epi16(T51, _mm_load_si128((__m128i*)tab_dct_8[2]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
212 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
213 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
214 T70 = _mm_packs_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
215 _mm_store_si128((__m128i*)&tmp[8 * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
216
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
217 T60 = _mm_madd_epi16(T52, _mm_load_si128((__m128i*)tab_dct_8[3]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
218 T61 = _mm_madd_epi16(T53, _mm_load_si128((__m128i*)tab_dct_8[3]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
219 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
220 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
221 T70 = _mm_packs_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
222 _mm_store_si128((__m128i*)&tmp[4 * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
223
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
224 T60 = _mm_madd_epi16(T52, _mm_load_si128((__m128i*)tab_dct_8[4]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
225 T61 = _mm_madd_epi16(T53, _mm_load_si128((__m128i*)tab_dct_8[4]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
226 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
227 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
228 T70 = _mm_packs_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
229 _mm_store_si128((__m128i*)&tmp[12 * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
230
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
231 T60 = _mm_madd_epi16(T44, _mm_load_si128((__m128i*)tab_dct_8[5]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
232 T61 = _mm_madd_epi16(T45, _mm_load_si128((__m128i*)tab_dct_8[5]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
233 T62 = _mm_madd_epi16(T46, _mm_load_si128((__m128i*)tab_dct_8[5]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
234 T63 = _mm_madd_epi16(T47, _mm_load_si128((__m128i*)tab_dct_8[5]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
235 T60 = _mm_hadd_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
236 T61 = _mm_hadd_epi32(T62, T63);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
237 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
238 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
239 T70 = _mm_packs_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
240 _mm_store_si128((__m128i*)&tmp[2 * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
241
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
242 T60 = _mm_madd_epi16(T44, _mm_load_si128((__m128i*)tab_dct_8[6]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
243 T61 = _mm_madd_epi16(T45, _mm_load_si128((__m128i*)tab_dct_8[6]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
244 T62 = _mm_madd_epi16(T46, _mm_load_si128((__m128i*)tab_dct_8[6]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
245 T63 = _mm_madd_epi16(T47, _mm_load_si128((__m128i*)tab_dct_8[6]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
246 T60 = _mm_hadd_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
247 T61 = _mm_hadd_epi32(T62, T63);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
248 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
249 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
250 T70 = _mm_packs_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
251 _mm_store_si128((__m128i*)&tmp[6 * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
252
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
253 T60 = _mm_madd_epi16(T44, _mm_load_si128((__m128i*)tab_dct_8[7]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
254 T61 = _mm_madd_epi16(T45, _mm_load_si128((__m128i*)tab_dct_8[7]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
255 T62 = _mm_madd_epi16(T46, _mm_load_si128((__m128i*)tab_dct_8[7]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
256 T63 = _mm_madd_epi16(T47, _mm_load_si128((__m128i*)tab_dct_8[7]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
257 T60 = _mm_hadd_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
258 T61 = _mm_hadd_epi32(T62, T63);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
259 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
260 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
261 T70 = _mm_packs_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
262 _mm_store_si128((__m128i*)&tmp[10 * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
263
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
264 T60 = _mm_madd_epi16(T44, _mm_load_si128((__m128i*)tab_dct_8[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
265 T61 = _mm_madd_epi16(T45, _mm_load_si128((__m128i*)tab_dct_8[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
266 T62 = _mm_madd_epi16(T46, _mm_load_si128((__m128i*)tab_dct_8[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
267 T63 = _mm_madd_epi16(T47, _mm_load_si128((__m128i*)tab_dct_8[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
268 T60 = _mm_hadd_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
269 T61 = _mm_hadd_epi32(T62, T63);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
270 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
271 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
272 T70 = _mm_packs_epi32(T60, T61);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
273 _mm_store_si128((__m128i*)&tmp[14 * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
274
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
275 #define MAKE_ODD(tab, dstPos) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
276 T60 = _mm_madd_epi16(T20, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
277 T61 = _mm_madd_epi16(T21, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
278 T62 = _mm_madd_epi16(T22, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
279 T63 = _mm_madd_epi16(T23, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
280 T64 = _mm_madd_epi16(T24, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
281 T65 = _mm_madd_epi16(T25, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
282 T66 = _mm_madd_epi16(T26, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
283 T67 = _mm_madd_epi16(T27, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
284 T60 = _mm_hadd_epi32(T60, T61); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
285 T61 = _mm_hadd_epi32(T62, T63); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
286 T62 = _mm_hadd_epi32(T64, T65); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
287 T63 = _mm_hadd_epi32(T66, T67); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
288 T60 = _mm_hadd_epi32(T60, T61); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
289 T61 = _mm_hadd_epi32(T62, T63); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
290 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_4), DCT16_SHIFT1); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
291 T61 = _mm_srai_epi32(_mm_add_epi32(T61, c_4), DCT16_SHIFT1); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
292 T70 = _mm_packs_epi32(T60, T61); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
293 _mm_store_si128((__m128i*)&tmp[(dstPos) * 16 + i], T70);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
294
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
295 MAKE_ODD(0, 1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
296 MAKE_ODD(1, 3);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
297 MAKE_ODD(2, 5);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
298 MAKE_ODD(3, 7);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
299 MAKE_ODD(4, 9);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
300 MAKE_ODD(5, 11);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
301 MAKE_ODD(6, 13);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
302 MAKE_ODD(7, 15);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
303 #undef MAKE_ODD
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
304 }
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
305
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
306 // DCT2
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
307 for (i = 0; i < 16; i += 4)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
308 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
309 T00A = _mm_load_si128((__m128i*)&tmp[(i + 0) * 16 + 0]); // [07 06 05 04 03 02 01 00]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
310 T00B = _mm_load_si128((__m128i*)&tmp[(i + 0) * 16 + 8]); // [0F 0E 0D 0C 0B 0A 09 08]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
311 T01A = _mm_load_si128((__m128i*)&tmp[(i + 1) * 16 + 0]); // [17 16 15 14 13 12 11 10]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
312 T01B = _mm_load_si128((__m128i*)&tmp[(i + 1) * 16 + 8]); // [1F 1E 1D 1C 1B 1A 19 18]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
313 T02A = _mm_load_si128((__m128i*)&tmp[(i + 2) * 16 + 0]); // [27 26 25 24 23 22 21 20]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
314 T02B = _mm_load_si128((__m128i*)&tmp[(i + 2) * 16 + 8]); // [2F 2E 2D 2C 2B 2A 29 28]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
315 T03A = _mm_load_si128((__m128i*)&tmp[(i + 3) * 16 + 0]); // [37 36 35 34 33 32 31 30]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
316 T03B = _mm_load_si128((__m128i*)&tmp[(i + 3) * 16 + 8]); // [3F 3E 3D 3C 3B 3A 39 38]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
317
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
318 T00A = _mm_shuffle_epi8(T00A, _mm_load_si128((__m128i*)tab_dct_16_0[2]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
319 T00B = _mm_shuffle_epi8(T00B, _mm_load_si128((__m128i*)tab_dct_16_0[3]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
320 T01A = _mm_shuffle_epi8(T01A, _mm_load_si128((__m128i*)tab_dct_16_0[2]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
321 T01B = _mm_shuffle_epi8(T01B, _mm_load_si128((__m128i*)tab_dct_16_0[3]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
322 T02A = _mm_shuffle_epi8(T02A, _mm_load_si128((__m128i*)tab_dct_16_0[2]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
323 T02B = _mm_shuffle_epi8(T02B, _mm_load_si128((__m128i*)tab_dct_16_0[3]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
324 T03A = _mm_shuffle_epi8(T03A, _mm_load_si128((__m128i*)tab_dct_16_0[2]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
325 T03B = _mm_shuffle_epi8(T03B, _mm_load_si128((__m128i*)tab_dct_16_0[3]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
326
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
327 T10 = _mm_unpacklo_epi16(T00A, T00B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
328 T11 = _mm_unpackhi_epi16(T00A, T00B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
329 T12 = _mm_unpacklo_epi16(T01A, T01B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
330 T13 = _mm_unpackhi_epi16(T01A, T01B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
331 T14 = _mm_unpacklo_epi16(T02A, T02B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
332 T15 = _mm_unpackhi_epi16(T02A, T02B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
333 T16 = _mm_unpacklo_epi16(T03A, T03B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
334 T17 = _mm_unpackhi_epi16(T03A, T03B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
335
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
336 T20 = _mm_madd_epi16(T10, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
337 T21 = _mm_madd_epi16(T11, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
338 T22 = _mm_madd_epi16(T12, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
339 T23 = _mm_madd_epi16(T13, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
340 T24 = _mm_madd_epi16(T14, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
341 T25 = _mm_madd_epi16(T15, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
342 T26 = _mm_madd_epi16(T16, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
343 T27 = _mm_madd_epi16(T17, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
344
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
345 T30 = _mm_add_epi32(T20, T21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
346 T31 = _mm_add_epi32(T22, T23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
347 T32 = _mm_add_epi32(T24, T25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
348 T33 = _mm_add_epi32(T26, T27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
349
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
350 T30 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
351 T31 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
352
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
353 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
354 T41 = _mm_hsub_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
355 T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
356 T41 = _mm_srai_epi32(_mm_add_epi32(T41, c_512), DCT16_SHIFT2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
357 T40 = _mm_packs_epi32(T40, T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
358 T41 = _mm_packs_epi32(T41, T41);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
359 _mm_storel_epi64((__m128i*)&dst[0 * 16 + i], T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
360 _mm_storel_epi64((__m128i*)&dst[8 * 16 + i], T41);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
361
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
362 T20 = _mm_madd_epi16(T10, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
363 T21 = _mm_madd_epi16(T11, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
364 T22 = _mm_madd_epi16(T12, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
365 T23 = _mm_madd_epi16(T13, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
366 T24 = _mm_madd_epi16(T14, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
367 T25 = _mm_madd_epi16(T15, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
368 T26 = _mm_madd_epi16(T16, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
369 T27 = _mm_madd_epi16(T17, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
370
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
371 T30 = _mm_add_epi32(T20, T21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
372 T31 = _mm_add_epi32(T22, T23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
373 T32 = _mm_add_epi32(T24, T25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
374 T33 = _mm_add_epi32(T26, T27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
375
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
376 T30 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
377 T31 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
378
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
379 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
380 T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
381 T40 = _mm_packs_epi32(T40, T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
382 _mm_storel_epi64((__m128i*)&dst[4 * 16 + i], T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
383
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
384 T20 = _mm_madd_epi16(T10, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
385 T21 = _mm_madd_epi16(T11, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
386 T22 = _mm_madd_epi16(T12, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
387 T23 = _mm_madd_epi16(T13, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
388 T24 = _mm_madd_epi16(T14, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
389 T25 = _mm_madd_epi16(T15, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
390 T26 = _mm_madd_epi16(T16, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
391 T27 = _mm_madd_epi16(T17, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
392
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
393 T30 = _mm_add_epi32(T20, T21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
394 T31 = _mm_add_epi32(T22, T23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
395 T32 = _mm_add_epi32(T24, T25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
396 T33 = _mm_add_epi32(T26, T27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
397
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
398 T30 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
399 T31 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
400
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
401 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
402 T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
403 T40 = _mm_packs_epi32(T40, T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
404 _mm_storel_epi64((__m128i*)&dst[12 * 16 + i], T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
405
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
406 T20 = _mm_madd_epi16(T10, _mm_load_si128((__m128i*)tab_dct_16_1[10]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
407 T21 = _mm_madd_epi16(T11, _mm_load_si128((__m128i*)tab_dct_16_1[10]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
408 T22 = _mm_madd_epi16(T12, _mm_load_si128((__m128i*)tab_dct_16_1[10]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
409 T23 = _mm_madd_epi16(T13, _mm_load_si128((__m128i*)tab_dct_16_1[10]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
410 T24 = _mm_madd_epi16(T14, _mm_load_si128((__m128i*)tab_dct_16_1[10]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
411 T25 = _mm_madd_epi16(T15, _mm_load_si128((__m128i*)tab_dct_16_1[10]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
412 T26 = _mm_madd_epi16(T16, _mm_load_si128((__m128i*)tab_dct_16_1[10]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
413 T27 = _mm_madd_epi16(T17, _mm_load_si128((__m128i*)tab_dct_16_1[10]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
414
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
415 T30 = _mm_sub_epi32(T20, T21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
416 T31 = _mm_sub_epi32(T22, T23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
417 T32 = _mm_sub_epi32(T24, T25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
418 T33 = _mm_sub_epi32(T26, T27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
419
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
420 T30 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
421 T31 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
422
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
423 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
424 T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
425 T40 = _mm_packs_epi32(T40, T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
426 _mm_storel_epi64((__m128i*)&dst[2 * 16 + i], T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
427
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
428 T20 = _mm_madd_epi16(T10, _mm_load_si128((__m128i*)tab_dct_16_1[11]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
429 T21 = _mm_madd_epi16(T11, _mm_load_si128((__m128i*)tab_dct_16_1[11]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
430 T22 = _mm_madd_epi16(T12, _mm_load_si128((__m128i*)tab_dct_16_1[11]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
431 T23 = _mm_madd_epi16(T13, _mm_load_si128((__m128i*)tab_dct_16_1[11]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
432 T24 = _mm_madd_epi16(T14, _mm_load_si128((__m128i*)tab_dct_16_1[11]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
433 T25 = _mm_madd_epi16(T15, _mm_load_si128((__m128i*)tab_dct_16_1[11]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
434 T26 = _mm_madd_epi16(T16, _mm_load_si128((__m128i*)tab_dct_16_1[11]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
435 T27 = _mm_madd_epi16(T17, _mm_load_si128((__m128i*)tab_dct_16_1[11]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
436
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
437 T30 = _mm_sub_epi32(T20, T21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
438 T31 = _mm_sub_epi32(T22, T23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
439 T32 = _mm_sub_epi32(T24, T25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
440 T33 = _mm_sub_epi32(T26, T27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
441
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
442 T30 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
443 T31 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
444
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
445 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
446 T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
447 T40 = _mm_packs_epi32(T40, T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
448 _mm_storel_epi64((__m128i*)&dst[6 * 16 + i], T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
449
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
450 T20 = _mm_madd_epi16(T10, _mm_load_si128((__m128i*)tab_dct_16_1[12]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
451 T21 = _mm_madd_epi16(T11, _mm_load_si128((__m128i*)tab_dct_16_1[12]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
452 T22 = _mm_madd_epi16(T12, _mm_load_si128((__m128i*)tab_dct_16_1[12]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
453 T23 = _mm_madd_epi16(T13, _mm_load_si128((__m128i*)tab_dct_16_1[12]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
454 T24 = _mm_madd_epi16(T14, _mm_load_si128((__m128i*)tab_dct_16_1[12]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
455 T25 = _mm_madd_epi16(T15, _mm_load_si128((__m128i*)tab_dct_16_1[12]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
456 T26 = _mm_madd_epi16(T16, _mm_load_si128((__m128i*)tab_dct_16_1[12]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
457 T27 = _mm_madd_epi16(T17, _mm_load_si128((__m128i*)tab_dct_16_1[12]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
458
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
459 T30 = _mm_sub_epi32(T20, T21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
460 T31 = _mm_sub_epi32(T22, T23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
461 T32 = _mm_sub_epi32(T24, T25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
462 T33 = _mm_sub_epi32(T26, T27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
463
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
464 T30 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
465 T31 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
466
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
467 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
468 T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
469 T40 = _mm_packs_epi32(T40, T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
470 _mm_storel_epi64((__m128i*)&dst[10 * 16 + i], T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
471
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
472 T20 = _mm_madd_epi16(T10, _mm_load_si128((__m128i*)tab_dct_16_1[13]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
473 T21 = _mm_madd_epi16(T11, _mm_load_si128((__m128i*)tab_dct_16_1[13]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
474 T22 = _mm_madd_epi16(T12, _mm_load_si128((__m128i*)tab_dct_16_1[13]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
475 T23 = _mm_madd_epi16(T13, _mm_load_si128((__m128i*)tab_dct_16_1[13]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
476 T24 = _mm_madd_epi16(T14, _mm_load_si128((__m128i*)tab_dct_16_1[13]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
477 T25 = _mm_madd_epi16(T15, _mm_load_si128((__m128i*)tab_dct_16_1[13]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
478 T26 = _mm_madd_epi16(T16, _mm_load_si128((__m128i*)tab_dct_16_1[13]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
479 T27 = _mm_madd_epi16(T17, _mm_load_si128((__m128i*)tab_dct_16_1[13]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
480
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
481 T30 = _mm_sub_epi32(T20, T21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
482 T31 = _mm_sub_epi32(T22, T23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
483 T32 = _mm_sub_epi32(T24, T25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
484 T33 = _mm_sub_epi32(T26, T27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
485
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
486 T30 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
487 T31 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
488
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
489 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
490 T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
491 T40 = _mm_packs_epi32(T40, T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
492 _mm_storel_epi64((__m128i*)&dst[14 * 16 + i], T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
493
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
494 #define MAKE_ODD(tab, dstPos) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
495 T20 = _mm_madd_epi16(T10, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); /* [*O2_0 *O1_0 *O3_0 *O0_0] */ \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
496 T21 = _mm_madd_epi16(T11, _mm_load_si128((__m128i*)tab_dct_16_1[(tab) + 1])); /* [*O5_0 *O6_0 *O4_0 *O7_0] */ \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
497 T22 = _mm_madd_epi16(T12, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
498 T23 = _mm_madd_epi16(T13, _mm_load_si128((__m128i*)tab_dct_16_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
499 T24 = _mm_madd_epi16(T14, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
500 T25 = _mm_madd_epi16(T15, _mm_load_si128((__m128i*)tab_dct_16_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
501 T26 = _mm_madd_epi16(T16, _mm_load_si128((__m128i*)tab_dct_16_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
502 T27 = _mm_madd_epi16(T17, _mm_load_si128((__m128i*)tab_dct_16_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
503 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
504 T30 = _mm_add_epi32(T20, T21); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
505 T31 = _mm_add_epi32(T22, T23); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
506 T32 = _mm_add_epi32(T24, T25); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
507 T33 = _mm_add_epi32(T26, T27); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
508 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
509 T30 = _mm_hadd_epi32(T30, T31); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
510 T31 = _mm_hadd_epi32(T32, T33); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
511 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
512 T40 = _mm_hadd_epi32(T30, T31); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
513 T40 = _mm_srai_epi32(_mm_add_epi32(T40, c_512), DCT16_SHIFT2); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
514 T40 = _mm_packs_epi32(T40, T40); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
515 _mm_storel_epi64((__m128i*)&dst[(dstPos) * 16 + i], T40);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
516
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
517 MAKE_ODD(14, 1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
518 MAKE_ODD(16, 3);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
519 MAKE_ODD(18, 5);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
520 MAKE_ODD(20, 7);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
521 MAKE_ODD(22, 9);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
522 MAKE_ODD(24, 11);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
523 MAKE_ODD(26, 13);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
524 MAKE_ODD(28, 15);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
525 #undef MAKE_ODD
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
526 }
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
527 }
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
528
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
529 ALIGN_VAR_32(static const int16_t, tab_dct_32_0[][8]) =
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
530 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
531 { 0x0F0E, 0x0100, 0x0908, 0x0706, 0x0D0C, 0x0302, 0x0B0A, 0x0504 }, // 0
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
532 };
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
533
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
534 ALIGN_VAR_32(static const int16_t, tab_dct_32_1[][8]) =
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
535 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
536 { 89, -89, 18, -18, 75, -75, 50, -50 }, // 0
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
537 { 75, -75, -50, 50, -18, 18, -89, 89 }, // 1
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
538 { 50, -50, 75, -75, -89, 89, 18, -18 }, // 2
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
539 { 18, -18, -89, 89, -50, 50, 75, -75 }, // 3
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
540
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
541 #define MAKE_COEF8(a0, a1, a2, a3, a4, a5, a6, a7) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
542 { (a0), (a7), (a3), (a4), (a1), (a6), (a2), (a5) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
543 }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
544
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
545 MAKE_COEF8(90, 87, 80, 70, 57, 43, 25, 9) // 4
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
546 MAKE_COEF8(87, 57, 9, -43, -80, -90, -70, -25) // 5
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
547 MAKE_COEF8(80, 9, -70, -87, -25, 57, 90, 43) // 6
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
548 MAKE_COEF8(70, -43, -87, 9, 90, 25, -80, -57) // 7
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
549 MAKE_COEF8(57, -80, -25, 90, -9, -87, 43, 70) // 8
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
550 MAKE_COEF8(43, -90, 57, 25, -87, 70, 9, -80) // 9
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
551 MAKE_COEF8(25, -70, 90, -80, 43, 9, -57, 87) // 10
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
552 MAKE_COEF8(9, -25, 43, -57, 70, -80, 87, -90) // 11
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
553 #undef MAKE_COEF8
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
554
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
555 #define MAKE_COEF16(a00, a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, a11, a12, a13, a14, a15) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
556 { (a00), (a07), (a03), (a04), (a01), (a06), (a02), (a05) }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
557 { (a15), (a08), (a12), (a11), (a14), (a09), (a13), (a10) },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
558
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
559 MAKE_COEF16(90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4) // 12
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
560 MAKE_COEF16(90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13) // 14
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
561 MAKE_COEF16(88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22) // 16
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
562 MAKE_COEF16(85, 46, -13, -67, -90, -73, -22, 38, 82, 88, 54, -4, -61, -90, -78, -31) // 18
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
563 MAKE_COEF16(82, 22, -54, -90, -61, 13, 78, 85, 31, -46, -90, -67, 4, 73, 88, 38) // 20
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
564 MAKE_COEF16(78, -4, -82, -73, 13, 85, 67, -22, -88, -61, 31, 90, 54, -38, -90, -46) // 22
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
565 MAKE_COEF16(73, -31, -90, -22, 78, 67, -38, -90, -13, 82, 61, -46, -88, -4, 85, 54) // 24
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
566 MAKE_COEF16(67, -54, -78, 38, 85, -22, -90, 4, 90, 13, -88, -31, 82, 46, -73, -61) // 26
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
567 MAKE_COEF16(61, -73, -46, 82, 31, -88, -13, 90, -4, -90, 22, 85, -38, -78, 54, 67) // 28
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
568 MAKE_COEF16(54, -85, -4, 88, -46, -61, 82, 13, -90, 38, 67, -78, -22, 90, -31, -73) // 30
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
569 MAKE_COEF16(46, -90, 38, 54, -90, 31, 61, -88, 22, 67, -85, 13, 73, -82, 4, 78) // 32
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
570 MAKE_COEF16(38, -88, 73, -4, -67, 90, -46, -31, 85, -78, 13, 61, -90, 54, 22, -82) // 34
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
571 MAKE_COEF16(31, -78, 90, -61, 4, 54, -88, 82, -38, -22, 73, -90, 67, -13, -46, 85) // 36
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
572 MAKE_COEF16(22, -61, 85, -90, 73, -38, -4, 46, -78, 90, -82, 54, -13, -31, 67, -88) // 38
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
573 MAKE_COEF16(13, -38, 61, -78, 88, -90, 85, -73, 54, -31, 4, 22, -46, 67, -82, 90) // 40
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
574 MAKE_COEF16(4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90) // 42
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
575 #undef MAKE_COEF16
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
576
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
577 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
578 64, 64, 64, 64, 64, 64, 64, 64
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
579 }, // 44
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
580
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
581 { 64, 64, -64, -64, -64, -64, 64, 64 }, // 45
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
582
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
583 { 83, 83, 36, 36, -36, -36, -83, -83 }, // 46
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
584 { -83, -83, -36, -36, 36, 36, 83, 83 }, // 47
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
585
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
586 { 36, 36, -83, -83, 83, 83, -36, -36 }, // 48
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
587 { -36, -36, 83, 83, -83, -83, 36, 36 }, // 49
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
588
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
589 #define MAKE_COEF16(a00, a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, a11, a12, a13, a14, a15) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
590 { (a00), (a00), (a01), (a01), (a02), (a02), (a03), (a03) }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
591 { (a04), (a04), (a05), (a05), (a06), (a06), (a07), (a07) }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
592 { (a08), (a08), (a09), (a09), (a10), (a10), (a11), (a11) }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
593 { (a12), (a12), (a13), (a13), (a14), (a14), (a15), (a15) },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
594
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
595 MAKE_COEF16(89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89) // 50
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
596 MAKE_COEF16(75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75) // 54
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
597
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
598 // TODO: convert below table here
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
599 #undef MAKE_COEF16
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
600
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
601 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
602 50, 50, -89, -89, 18, 18, 75, 75
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
603 }, // 58
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
604 { -75, -75, -18, -18, 89, 89, -50, -50 }, // 59
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
605 { -50, -50, 89, 89, -18, -18, -75, -75 }, // 60
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
606 { 75, 75, 18, 18, -89, -89, 50, 50 }, // 61
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
607
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
608 { 18, 18, -50, -50, 75, 75, -89, -89 }, // 62
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
609 { 89, 89, -75, -75, 50, 50, -18, -18 }, // 63
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
610 { -18, -18, 50, 50, -75, -75, 89, 89 }, // 64
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
611 { -89, -89, 75, 75, -50, -50, 18, 18 }, // 65
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
612
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
613 { 90, 90, 87, 87, 80, 80, 70, 70 }, // 66
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
614 { 57, 57, 43, 43, 25, 25, 9, 9 }, // 67
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
615 { -9, -9, -25, -25, -43, -43, -57, -57 }, // 68
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
616 { -70, -70, -80, -80, -87, -87, -90, -90 }, // 69
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
617
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
618 { 87, 87, 57, 57, 9, 9, -43, -43 }, // 70
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
619 { -80, -80, -90, -90, -70, -70, -25, -25 }, // 71
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
620 { 25, 25, 70, 70, 90, 90, 80, 80 }, // 72
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
621 { 43, 43, -9, -9, -57, -57, -87, -87 }, // 73
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
622
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
623 { 80, 80, 9, 9, -70, -70, -87, -87 }, // 74
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
624 { -25, -25, 57, 57, 90, 90, 43, 43 }, // 75
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
625 { -43, -43, -90, -90, -57, -57, 25, 25 }, // 76
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
626 { 87, 87, 70, 70, -9, -9, -80, -80 }, // 77
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
627
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
628 { 70, 70, -43, -43, -87, -87, 9, 9 }, // 78
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
629 { 90, 90, 25, 25, -80, -80, -57, -57 }, // 79
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
630 { 57, 57, 80, 80, -25, -25, -90, -90 }, // 80
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
631 { -9, -9, 87, 87, 43, 43, -70, -70 }, // 81
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
632
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
633 { 57, 57, -80, -80, -25, -25, 90, 90 }, // 82
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
634 { -9, -9, -87, -87, 43, 43, 70, 70 }, // 83
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
635 { -70, -70, -43, -43, 87, 87, 9, 9 }, // 84
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
636 { -90, -90, 25, 25, 80, 80, -57, -57 }, // 85
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
637
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
638 { 43, 43, -90, -90, 57, 57, 25, 25 }, // 86
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
639 { -87, -87, 70, 70, 9, 9, -80, -80 }, // 87
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
640 { 80, 80, -9, -9, -70, -70, 87, 87 }, // 88
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
641 { -25, -25, -57, -57, 90, 90, -43, -43 }, // 89
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
642
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
643 { 25, 25, -70, -70, 90, 90, -80, -80 }, // 90
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
644 { 43, 43, 9, 9, -57, -57, 87, 87 }, // 91
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
645 { -87, -87, 57, 57, -9, -9, -43, -43 }, // 92
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
646 { 80, 80, -90, -90, 70, 70, -25, -25 }, // 93
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
647
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
648 { 9, 9, -25, -25, 43, 43, -57, -57 }, // 94
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
649 { 70, 70, -80, -80, 87, 87, -90, -90 }, // 95
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
650 { 90, 90, -87, -87, 80, 80, -70, -70 }, // 96
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
651 { 57, 57, -43, -43, 25, 25, -9, -9 }, // 97
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
652
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
653 #define MAKE_COEF16(a00, a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, a11, a12, a13, a14, a15) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
654 { (a00), -(a00), (a01), -(a01), (a02), -(a02), (a03), -(a03) }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
655 { (a04), -(a04), (a05), -(a05), (a06), -(a06), (a07), -(a07) }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
656 { (a08), -(a08), (a09), -(a09), (a10), -(a10), (a11), -(a11) }, \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
657 { (a12), -(a12), (a13), -(a13), (a14), -(a14), (a15), -(a15) },
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
658
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
659 MAKE_COEF16(90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4) // 98
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
660 MAKE_COEF16(90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13) //102
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
661 MAKE_COEF16(88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22) //106
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
662 MAKE_COEF16(85, 46, -13, -67, -90, -73, -22, 38, +82, 88, 54, -4, -61, -90, -78, -31) //110
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
663 MAKE_COEF16(82, 22, -54, -90, -61, 13, 78, 85, +31, -46, -90, -67, 4, 73, 88, 38) //114
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
664 MAKE_COEF16(78, -4, -82, -73, 13, 85, 67, -22, -88, -61, 31, 90, 54, -38, -90, -46) //118
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
665 MAKE_COEF16(73, -31, -90, -22, 78, 67, -38, -90, -13, 82, 61, -46, -88, -4, 85, 54) //122
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
666 MAKE_COEF16(67, -54, -78, 38, 85, -22, -90, 4, +90, 13, -88, -31, 82, 46, -73, -61) //126
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
667 MAKE_COEF16(61, -73, -46, 82, 31, -88, -13, 90, -4, -90, 22, 85, -38, -78, 54, 67) //130
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
668 MAKE_COEF16(54, -85, -4, 88, -46, -61, 82, 13, -90, 38, 67, -78, -22, 90, -31, -73) //134
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
669 MAKE_COEF16(46, -90, 38, 54, -90, 31, 61, -88, +22, 67, -85, 13, 73, -82, 4, 78) //138
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
670 MAKE_COEF16(38, -88, 73, -4, -67, 90, -46, -31, +85, -78, 13, 61, -90, 54, 22, -82) //142
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
671 MAKE_COEF16(31, -78, 90, -61, 4, 54, -88, 82, -38, -22, 73, -90, 67, -13, -46, 85) //146
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
672 MAKE_COEF16(22, -61, 85, -90, 73, -38, -4, 46, -78, 90, -82, 54, -13, -31, 67, -88) //150
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
673 MAKE_COEF16(13, -38, 61, -78, 88, -90, 85, -73, +54, -31, 4, 22, -46, 67, -82, 90) //154
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
674 MAKE_COEF16(4, -13, 22, -31, 38, -46, 54, -61, +67, -73, 78, -82, 85, -88, 90, -90) //158
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
675
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
676 #undef MAKE_COEF16
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
677 };
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
678
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
679 static void dct32(const int16_t *src, int16_t *dst, intptr_t stride)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
680 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
681 // Const
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
682 __m128i c_8 = _mm_set1_epi32(DCT32_ADD1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
683 __m128i c_1024 = _mm_set1_epi32(DCT32_ADD2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
684
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
685 int i;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
686
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
687 __m128i T00A, T01A, T02A, T03A, T04A, T05A, T06A, T07A;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
688 __m128i T00B, T01B, T02B, T03B, T04B, T05B, T06B, T07B;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
689 __m128i T00C, T01C, T02C, T03C, T04C, T05C, T06C, T07C;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
690 __m128i T00D, T01D, T02D, T03D, T04D, T05D, T06D, T07D;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
691 __m128i T10A, T11A, T12A, T13A, T14A, T15A, T16A, T17A;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
692 __m128i T10B, T11B, T12B, T13B, T14B, T15B, T16B, T17B;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
693 __m128i T20, T21, T22, T23, T24, T25, T26, T27;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
694 __m128i T30, T31, T32, T33, T34, T35, T36, T37;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
695 __m128i T40, T41, T42, T43, T44, T45, T46, T47;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
696 __m128i T50, T51, T52, T53;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
697 __m128i T60, T61, T62, T63, T64, T65, T66, T67;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
698 __m128i im[32][4];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
699
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
700 // DCT1
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
701 for (i = 0; i < 32 / 8; i++)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
702 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
703 T00A = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * stride + 0]); // [07 06 05 04 03 02 01 00]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
704 T00B = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * stride + 8]); // [15 14 13 12 11 10 09 08]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
705 T00C = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * stride + 16]); // [23 22 21 20 19 18 17 16]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
706 T00D = _mm_load_si128((__m128i*)&src[(i * 8 + 0) * stride + 24]); // [31 30 29 28 27 26 25 24]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
707 T01A = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * stride + 0]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
708 T01B = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * stride + 8]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
709 T01C = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * stride + 16]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
710 T01D = _mm_load_si128((__m128i*)&src[(i * 8 + 1) * stride + 24]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
711 T02A = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * stride + 0]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
712 T02B = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * stride + 8]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
713 T02C = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * stride + 16]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
714 T02D = _mm_load_si128((__m128i*)&src[(i * 8 + 2) * stride + 24]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
715 T03A = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * stride + 0]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
716 T03B = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * stride + 8]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
717 T03C = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * stride + 16]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
718 T03D = _mm_load_si128((__m128i*)&src[(i * 8 + 3) * stride + 24]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
719 T04A = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * stride + 0]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
720 T04B = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * stride + 8]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
721 T04C = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * stride + 16]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
722 T04D = _mm_load_si128((__m128i*)&src[(i * 8 + 4) * stride + 24]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
723 T05A = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * stride + 0]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
724 T05B = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * stride + 8]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
725 T05C = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * stride + 16]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
726 T05D = _mm_load_si128((__m128i*)&src[(i * 8 + 5) * stride + 24]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
727 T06A = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * stride + 0]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
728 T06B = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * stride + 8]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
729 T06C = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * stride + 16]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
730 T06D = _mm_load_si128((__m128i*)&src[(i * 8 + 6) * stride + 24]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
731 T07A = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * stride + 0]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
732 T07B = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * stride + 8]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
733 T07C = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * stride + 16]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
734 T07D = _mm_load_si128((__m128i*)&src[(i * 8 + 7) * stride + 24]);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
735
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
736 T00A = _mm_shuffle_epi8(T00A, _mm_load_si128((__m128i*)tab_dct_16_0[1])); // [05 02 06 01 04 03 07 00]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
737 T00B = _mm_shuffle_epi8(T00B, _mm_load_si128((__m128i*)tab_dct_32_0[0])); // [10 13 09 14 11 12 08 15]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
738 T00C = _mm_shuffle_epi8(T00C, _mm_load_si128((__m128i*)tab_dct_16_0[1])); // [21 18 22 17 20 19 23 16]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
739 T00D = _mm_shuffle_epi8(T00D, _mm_load_si128((__m128i*)tab_dct_32_0[0])); // [26 29 25 30 27 28 24 31]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
740 T01A = _mm_shuffle_epi8(T01A, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
741 T01B = _mm_shuffle_epi8(T01B, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
742 T01C = _mm_shuffle_epi8(T01C, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
743 T01D = _mm_shuffle_epi8(T01D, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
744 T02A = _mm_shuffle_epi8(T02A, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
745 T02B = _mm_shuffle_epi8(T02B, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
746 T02C = _mm_shuffle_epi8(T02C, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
747 T02D = _mm_shuffle_epi8(T02D, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
748 T03A = _mm_shuffle_epi8(T03A, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
749 T03B = _mm_shuffle_epi8(T03B, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
750 T03C = _mm_shuffle_epi8(T03C, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
751 T03D = _mm_shuffle_epi8(T03D, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
752 T04A = _mm_shuffle_epi8(T04A, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
753 T04B = _mm_shuffle_epi8(T04B, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
754 T04C = _mm_shuffle_epi8(T04C, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
755 T04D = _mm_shuffle_epi8(T04D, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
756 T05A = _mm_shuffle_epi8(T05A, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
757 T05B = _mm_shuffle_epi8(T05B, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
758 T05C = _mm_shuffle_epi8(T05C, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
759 T05D = _mm_shuffle_epi8(T05D, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
760 T06A = _mm_shuffle_epi8(T06A, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
761 T06B = _mm_shuffle_epi8(T06B, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
762 T06C = _mm_shuffle_epi8(T06C, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
763 T06D = _mm_shuffle_epi8(T06D, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
764 T07A = _mm_shuffle_epi8(T07A, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
765 T07B = _mm_shuffle_epi8(T07B, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
766 T07C = _mm_shuffle_epi8(T07C, _mm_load_si128((__m128i*)tab_dct_16_0[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
767 T07D = _mm_shuffle_epi8(T07D, _mm_load_si128((__m128i*)tab_dct_32_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
768
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
769 T10A = _mm_add_epi16(T00A, T00D); // [E05 E02 E06 E01 E04 E03 E07 E00]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
770 T10B = _mm_add_epi16(T00B, T00C); // [E10 E13 E09 E14 E11 E12 E08 E15]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
771 T11A = _mm_add_epi16(T01A, T01D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
772 T11B = _mm_add_epi16(T01B, T01C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
773 T12A = _mm_add_epi16(T02A, T02D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
774 T12B = _mm_add_epi16(T02B, T02C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
775 T13A = _mm_add_epi16(T03A, T03D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
776 T13B = _mm_add_epi16(T03B, T03C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
777 T14A = _mm_add_epi16(T04A, T04D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
778 T14B = _mm_add_epi16(T04B, T04C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
779 T15A = _mm_add_epi16(T05A, T05D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
780 T15B = _mm_add_epi16(T05B, T05C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
781 T16A = _mm_add_epi16(T06A, T06D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
782 T16B = _mm_add_epi16(T06B, T06C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
783 T17A = _mm_add_epi16(T07A, T07D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
784 T17B = _mm_add_epi16(T07B, T07C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
785
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
786 T00A = _mm_sub_epi16(T00A, T00D); // [O05 O02 O06 O01 O04 O03 O07 O00]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
787 T00B = _mm_sub_epi16(T00B, T00C); // [O10 O13 O09 O14 O11 O12 O08 O15]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
788 T01A = _mm_sub_epi16(T01A, T01D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
789 T01B = _mm_sub_epi16(T01B, T01C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
790 T02A = _mm_sub_epi16(T02A, T02D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
791 T02B = _mm_sub_epi16(T02B, T02C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
792 T03A = _mm_sub_epi16(T03A, T03D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
793 T03B = _mm_sub_epi16(T03B, T03C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
794 T04A = _mm_sub_epi16(T04A, T04D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
795 T04B = _mm_sub_epi16(T04B, T04C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
796 T05A = _mm_sub_epi16(T05A, T05D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
797 T05B = _mm_sub_epi16(T05B, T05C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
798 T06A = _mm_sub_epi16(T06A, T06D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
799 T06B = _mm_sub_epi16(T06B, T06C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
800 T07A = _mm_sub_epi16(T07A, T07D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
801 T07B = _mm_sub_epi16(T07B, T07C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
802
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
803 T20 = _mm_add_epi16(T10A, T10B); // [EE5 EE2 EE6 EE1 EE4 EE3 EE7 EE0]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
804 T21 = _mm_add_epi16(T11A, T11B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
805 T22 = _mm_add_epi16(T12A, T12B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
806 T23 = _mm_add_epi16(T13A, T13B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
807 T24 = _mm_add_epi16(T14A, T14B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
808 T25 = _mm_add_epi16(T15A, T15B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
809 T26 = _mm_add_epi16(T16A, T16B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
810 T27 = _mm_add_epi16(T17A, T17B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
811
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
812 T30 = _mm_madd_epi16(T20, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
813 T31 = _mm_madd_epi16(T21, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
814 T32 = _mm_madd_epi16(T22, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
815 T33 = _mm_madd_epi16(T23, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
816 T34 = _mm_madd_epi16(T24, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
817 T35 = _mm_madd_epi16(T25, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
818 T36 = _mm_madd_epi16(T26, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
819 T37 = _mm_madd_epi16(T27, _mm_load_si128((__m128i*)tab_dct_8[1]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
820
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
821 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
822 T41 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
823 T42 = _mm_hadd_epi32(T34, T35);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
824 T43 = _mm_hadd_epi32(T36, T37);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
825
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
826 T50 = _mm_hadd_epi32(T40, T41);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
827 T51 = _mm_hadd_epi32(T42, T43);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
828 T50 = _mm_srai_epi32(_mm_add_epi32(T50, c_8), DCT32_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
829 T51 = _mm_srai_epi32(_mm_add_epi32(T51, c_8), DCT32_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
830 T60 = _mm_packs_epi32(T50, T51);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
831 im[0][i] = T60;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
832
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
833 T50 = _mm_hsub_epi32(T40, T41);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
834 T51 = _mm_hsub_epi32(T42, T43);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
835 T50 = _mm_srai_epi32(_mm_add_epi32(T50, c_8), DCT32_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
836 T51 = _mm_srai_epi32(_mm_add_epi32(T51, c_8), DCT32_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
837 T60 = _mm_packs_epi32(T50, T51);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
838 im[16][i] = T60;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
839
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
840 T30 = _mm_madd_epi16(T20, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
841 T31 = _mm_madd_epi16(T21, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
842 T32 = _mm_madd_epi16(T22, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
843 T33 = _mm_madd_epi16(T23, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
844 T34 = _mm_madd_epi16(T24, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
845 T35 = _mm_madd_epi16(T25, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
846 T36 = _mm_madd_epi16(T26, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
847 T37 = _mm_madd_epi16(T27, _mm_load_si128((__m128i*)tab_dct_16_1[8]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
848
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
849 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
850 T41 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
851 T42 = _mm_hadd_epi32(T34, T35);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
852 T43 = _mm_hadd_epi32(T36, T37);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
853
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
854 T50 = _mm_hadd_epi32(T40, T41);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
855 T51 = _mm_hadd_epi32(T42, T43);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
856 T50 = _mm_srai_epi32(_mm_add_epi32(T50, c_8), DCT32_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
857 T51 = _mm_srai_epi32(_mm_add_epi32(T51, c_8), DCT32_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
858 T60 = _mm_packs_epi32(T50, T51);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
859 im[8][i] = T60;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
860
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
861 T30 = _mm_madd_epi16(T20, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
862 T31 = _mm_madd_epi16(T21, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
863 T32 = _mm_madd_epi16(T22, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
864 T33 = _mm_madd_epi16(T23, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
865 T34 = _mm_madd_epi16(T24, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
866 T35 = _mm_madd_epi16(T25, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
867 T36 = _mm_madd_epi16(T26, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
868 T37 = _mm_madd_epi16(T27, _mm_load_si128((__m128i*)tab_dct_16_1[9]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
869
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
870 T40 = _mm_hadd_epi32(T30, T31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
871 T41 = _mm_hadd_epi32(T32, T33);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
872 T42 = _mm_hadd_epi32(T34, T35);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
873 T43 = _mm_hadd_epi32(T36, T37);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
874
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
875 T50 = _mm_hadd_epi32(T40, T41);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
876 T51 = _mm_hadd_epi32(T42, T43);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
877 T50 = _mm_srai_epi32(_mm_add_epi32(T50, c_8), DCT32_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
878 T51 = _mm_srai_epi32(_mm_add_epi32(T51, c_8), DCT32_SHIFT1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
879 T60 = _mm_packs_epi32(T50, T51);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
880 im[24][i] = T60;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
881
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
882 #define MAKE_ODD(tab, dstPos) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
883 T30 = _mm_madd_epi16(T20, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
884 T31 = _mm_madd_epi16(T21, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
885 T32 = _mm_madd_epi16(T22, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
886 T33 = _mm_madd_epi16(T23, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
887 T34 = _mm_madd_epi16(T24, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
888 T35 = _mm_madd_epi16(T25, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
889 T36 = _mm_madd_epi16(T26, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
890 T37 = _mm_madd_epi16(T27, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
891 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
892 T40 = _mm_hadd_epi32(T30, T31); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
893 T41 = _mm_hadd_epi32(T32, T33); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
894 T42 = _mm_hadd_epi32(T34, T35); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
895 T43 = _mm_hadd_epi32(T36, T37); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
896 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
897 T50 = _mm_hadd_epi32(T40, T41); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
898 T51 = _mm_hadd_epi32(T42, T43); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
899 T50 = _mm_srai_epi32(_mm_add_epi32(T50, c_8), DCT32_SHIFT1); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
900 T51 = _mm_srai_epi32(_mm_add_epi32(T51, c_8), DCT32_SHIFT1); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
901 T60 = _mm_packs_epi32(T50, T51); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
902 im[(dstPos)][i] = T60;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
903
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
904 MAKE_ODD(0, 4);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
905 MAKE_ODD(1, 12);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
906 MAKE_ODD(2, 20);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
907 MAKE_ODD(3, 28);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
908
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
909 T20 = _mm_sub_epi16(T10A, T10B); // [EO5 EO2 EO6 EO1 EO4 EO3 EO7 EO0]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
910 T21 = _mm_sub_epi16(T11A, T11B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
911 T22 = _mm_sub_epi16(T12A, T12B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
912 T23 = _mm_sub_epi16(T13A, T13B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
913 T24 = _mm_sub_epi16(T14A, T14B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
914 T25 = _mm_sub_epi16(T15A, T15B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
915 T26 = _mm_sub_epi16(T16A, T16B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
916 T27 = _mm_sub_epi16(T17A, T17B);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
917
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
918 MAKE_ODD(4, 2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
919 MAKE_ODD(5, 6);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
920 MAKE_ODD(6, 10);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
921 MAKE_ODD(7, 14);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
922 MAKE_ODD(8, 18);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
923 MAKE_ODD(9, 22);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
924 MAKE_ODD(10, 26);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
925 MAKE_ODD(11, 30);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
926 #undef MAKE_ODD
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
927
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
928 #define MAKE_ODD(tab, dstPos) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
929 T20 = _mm_madd_epi16(T00A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
930 T21 = _mm_madd_epi16(T00B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
931 T22 = _mm_madd_epi16(T01A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
932 T23 = _mm_madd_epi16(T01B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
933 T24 = _mm_madd_epi16(T02A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
934 T25 = _mm_madd_epi16(T02B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
935 T26 = _mm_madd_epi16(T03A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
936 T27 = _mm_madd_epi16(T03B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
937 T30 = _mm_madd_epi16(T04A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
938 T31 = _mm_madd_epi16(T04B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
939 T32 = _mm_madd_epi16(T05A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
940 T33 = _mm_madd_epi16(T05B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
941 T34 = _mm_madd_epi16(T06A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
942 T35 = _mm_madd_epi16(T06B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
943 T36 = _mm_madd_epi16(T07A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
944 T37 = _mm_madd_epi16(T07B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab) + 1])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
945 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
946 T40 = _mm_hadd_epi32(T20, T21); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
947 T41 = _mm_hadd_epi32(T22, T23); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
948 T42 = _mm_hadd_epi32(T24, T25); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
949 T43 = _mm_hadd_epi32(T26, T27); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
950 T44 = _mm_hadd_epi32(T30, T31); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
951 T45 = _mm_hadd_epi32(T32, T33); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
952 T46 = _mm_hadd_epi32(T34, T35); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
953 T47 = _mm_hadd_epi32(T36, T37); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
954 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
955 T50 = _mm_hadd_epi32(T40, T41); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
956 T51 = _mm_hadd_epi32(T42, T43); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
957 T52 = _mm_hadd_epi32(T44, T45); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
958 T53 = _mm_hadd_epi32(T46, T47); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
959 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
960 T50 = _mm_hadd_epi32(T50, T51); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
961 T51 = _mm_hadd_epi32(T52, T53); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
962 T50 = _mm_srai_epi32(_mm_add_epi32(T50, c_8), DCT32_SHIFT1); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
963 T51 = _mm_srai_epi32(_mm_add_epi32(T51, c_8), DCT32_SHIFT1); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
964 T60 = _mm_packs_epi32(T50, T51); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
965 im[(dstPos)][i] = T60;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
966
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
967 MAKE_ODD(12, 1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
968 MAKE_ODD(14, 3);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
969 MAKE_ODD(16, 5);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
970 MAKE_ODD(18, 7);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
971 MAKE_ODD(20, 9);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
972 MAKE_ODD(22, 11);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
973 MAKE_ODD(24, 13);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
974 MAKE_ODD(26, 15);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
975 MAKE_ODD(28, 17);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
976 MAKE_ODD(30, 19);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
977 MAKE_ODD(32, 21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
978 MAKE_ODD(34, 23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
979 MAKE_ODD(36, 25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
980 MAKE_ODD(38, 27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
981 MAKE_ODD(40, 29);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
982 MAKE_ODD(42, 31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
983
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
984 #undef MAKE_ODD
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
985 }
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
986
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
987 // DCT2
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
988 for (i = 0; i < 32 / 4; i++)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
989 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
990 // OPT_ME: to avoid register spill, I use matrix multiply, have other way?
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
991 T00A = im[i * 4 + 0][0]; // [07 06 05 04 03 02 01 00]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
992 T00B = im[i * 4 + 0][1]; // [15 14 13 12 11 10 09 08]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
993 T00C = im[i * 4 + 0][2]; // [23 22 21 20 19 18 17 16]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
994 T00D = im[i * 4 + 0][3]; // [31 30 29 28 27 26 25 24]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
995 T01A = im[i * 4 + 1][0];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
996 T01B = im[i * 4 + 1][1];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
997 T01C = im[i * 4 + 1][2];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
998 T01D = im[i * 4 + 1][3];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
999 T02A = im[i * 4 + 2][0];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1000 T02B = im[i * 4 + 2][1];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1001 T02C = im[i * 4 + 2][2];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1002 T02D = im[i * 4 + 2][3];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1003 T03A = im[i * 4 + 3][0];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1004 T03B = im[i * 4 + 3][1];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1005 T03C = im[i * 4 + 3][2];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1006 T03D = im[i * 4 + 3][3];
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1007
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1008 T00C = _mm_shuffle_epi8(T00C, _mm_load_si128((__m128i*)tab_dct_16_0[0])); // [16 17 18 19 20 21 22 23]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1009 T00D = _mm_shuffle_epi8(T00D, _mm_load_si128((__m128i*)tab_dct_16_0[0])); // [24 25 26 27 28 29 30 31]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1010 T01C = _mm_shuffle_epi8(T01C, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1011 T01D = _mm_shuffle_epi8(T01D, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1012 T02C = _mm_shuffle_epi8(T02C, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1013 T02D = _mm_shuffle_epi8(T02D, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1014 T03C = _mm_shuffle_epi8(T03C, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1015 T03D = _mm_shuffle_epi8(T03D, _mm_load_si128((__m128i*)tab_dct_16_0[0]));
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1016
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1017 T10A = _mm_unpacklo_epi16(T00A, T00D); // [28 03 29 02 30 01 31 00]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1018 T10B = _mm_unpackhi_epi16(T00A, T00D); // [24 07 25 06 26 05 27 04]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1019 T00A = _mm_unpacklo_epi16(T00B, T00C); // [20 11 21 10 22 09 23 08]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1020 T00B = _mm_unpackhi_epi16(T00B, T00C); // [16 15 17 14 18 13 19 12]
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1021 T11A = _mm_unpacklo_epi16(T01A, T01D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1022 T11B = _mm_unpackhi_epi16(T01A, T01D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1023 T01A = _mm_unpacklo_epi16(T01B, T01C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1024 T01B = _mm_unpackhi_epi16(T01B, T01C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1025 T12A = _mm_unpacklo_epi16(T02A, T02D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1026 T12B = _mm_unpackhi_epi16(T02A, T02D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1027 T02A = _mm_unpacklo_epi16(T02B, T02C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1028 T02B = _mm_unpackhi_epi16(T02B, T02C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1029 T13A = _mm_unpacklo_epi16(T03A, T03D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1030 T13B = _mm_unpackhi_epi16(T03A, T03D);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1031 T03A = _mm_unpacklo_epi16(T03B, T03C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1032 T03B = _mm_unpackhi_epi16(T03B, T03C);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1033
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1034 #define MAKE_ODD(tab0, tab1, tab2, tab3, dstPos) \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1035 T20 = _mm_madd_epi16(T10A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab0)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1036 T21 = _mm_madd_epi16(T10B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab1)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1037 T22 = _mm_madd_epi16(T00A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab2)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1038 T23 = _mm_madd_epi16(T00B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab3)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1039 T24 = _mm_madd_epi16(T11A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab0)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1040 T25 = _mm_madd_epi16(T11B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab1)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1041 T26 = _mm_madd_epi16(T01A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab2)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1042 T27 = _mm_madd_epi16(T01B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab3)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1043 T30 = _mm_madd_epi16(T12A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab0)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1044 T31 = _mm_madd_epi16(T12B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab1)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1045 T32 = _mm_madd_epi16(T02A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab2)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1046 T33 = _mm_madd_epi16(T02B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab3)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1047 T34 = _mm_madd_epi16(T13A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab0)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1048 T35 = _mm_madd_epi16(T13B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab1)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1049 T36 = _mm_madd_epi16(T03A, _mm_load_si128((__m128i*)tab_dct_32_1[(tab2)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1050 T37 = _mm_madd_epi16(T03B, _mm_load_si128((__m128i*)tab_dct_32_1[(tab3)])); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1051 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1052 T60 = _mm_hadd_epi32(T20, T21); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1053 T61 = _mm_hadd_epi32(T22, T23); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1054 T62 = _mm_hadd_epi32(T24, T25); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1055 T63 = _mm_hadd_epi32(T26, T27); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1056 T64 = _mm_hadd_epi32(T30, T31); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1057 T65 = _mm_hadd_epi32(T32, T33); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1058 T66 = _mm_hadd_epi32(T34, T35); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1059 T67 = _mm_hadd_epi32(T36, T37); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1060 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1061 T60 = _mm_hadd_epi32(T60, T61); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1062 T61 = _mm_hadd_epi32(T62, T63); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1063 T62 = _mm_hadd_epi32(T64, T65); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1064 T63 = _mm_hadd_epi32(T66, T67); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1065 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1066 T60 = _mm_hadd_epi32(T60, T61); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1067 T61 = _mm_hadd_epi32(T62, T63); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1068 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1069 T60 = _mm_hadd_epi32(T60, T61); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1070 \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1071 T60 = _mm_srai_epi32(_mm_add_epi32(T60, c_1024), DCT32_SHIFT2); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1072 T60 = _mm_packs_epi32(T60, T60); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1073 _mm_storel_epi64((__m128i*)&dst[(dstPos) * 32 + (i * 4) + 0], T60); \
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1074
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1075 MAKE_ODD(44, 44, 44, 44, 0);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1076 MAKE_ODD(45, 45, 45, 45, 16);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1077 MAKE_ODD(46, 47, 46, 47, 8);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1078 MAKE_ODD(48, 49, 48, 49, 24);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1079
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1080 MAKE_ODD(50, 51, 52, 53, 4);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1081 MAKE_ODD(54, 55, 56, 57, 12);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1082 MAKE_ODD(58, 59, 60, 61, 20);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1083 MAKE_ODD(62, 63, 64, 65, 28);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1084
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1085 MAKE_ODD(66, 67, 68, 69, 2);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1086 MAKE_ODD(70, 71, 72, 73, 6);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1087 MAKE_ODD(74, 75, 76, 77, 10);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1088 MAKE_ODD(78, 79, 80, 81, 14);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1089
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1090 MAKE_ODD(82, 83, 84, 85, 18);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1091 MAKE_ODD(86, 87, 88, 89, 22);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1092 MAKE_ODD(90, 91, 92, 93, 26);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1093 MAKE_ODD(94, 95, 96, 97, 30);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1094
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1095 MAKE_ODD(98, 99, 100, 101, 1);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1096 MAKE_ODD(102, 103, 104, 105, 3);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1097 MAKE_ODD(106, 107, 108, 109, 5);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1098 MAKE_ODD(110, 111, 112, 113, 7);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1099 MAKE_ODD(114, 115, 116, 117, 9);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1100 MAKE_ODD(118, 119, 120, 121, 11);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1101 MAKE_ODD(122, 123, 124, 125, 13);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1102 MAKE_ODD(126, 127, 128, 129, 15);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1103 MAKE_ODD(130, 131, 132, 133, 17);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1104 MAKE_ODD(134, 135, 136, 137, 19);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1105 MAKE_ODD(138, 139, 140, 141, 21);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1106 MAKE_ODD(142, 143, 144, 145, 23);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1107 MAKE_ODD(146, 147, 148, 149, 25);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1108 MAKE_ODD(150, 151, 152, 153, 27);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1109 MAKE_ODD(154, 155, 156, 157, 29);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1110 MAKE_ODD(158, 159, 160, 161, 31);
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1111 #undef MAKE_ODD
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1112 }
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1113 }
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1114
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1115 namespace X265_NS {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1116 void setupIntrinsicDCT_ssse3(EncoderPrimitives &p)
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1117 {
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1118 /* Note: We have AVX2 assembly for these two functions, but since AVX2 is
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1119 * still somewhat rare on end-user PCs we still compile and link these SSSE3
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1120 * intrinsic SIMD functions */
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1121 p.cu[BLOCK_16x16].dct = dct16;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1122 p.cu[BLOCK_32x32].dct = dct32;
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1123 }
772086c29cc7 Initial import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1124 }