Mercurial > hg > dmlib
comparison src/dmvecmat.c @ 1306:696c58784635
Remove SSE2 SIMD vector math paths, apparently they no longer work (segfault).. maybe a 32bit/64bit issue.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sat, 19 Aug 2017 13:02:34 +0300 |
parents | 848a88ce7a57 |
children | 69a5af2eb1ea |
comparison
equal
deleted
inserted
replaced
1305:582d49915059 | 1306:696c58784635 |
---|---|
10 void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist) | 10 void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist) |
11 { | 11 { |
12 int i; | 12 int i; |
13 for (i = 0; i < nlist; i++) | 13 for (i = 0; i < nlist; i++) |
14 { | 14 { |
15 #ifdef DM_USE_SIMD | |
16 asm("movups %2, %%xmm1\n" | |
17 "movups %1, %%xmm2\n" | |
18 "addps %%xmm2, %%xmm1\n" | |
19 "movups %%xmm1, %0\n" | |
20 : "=m" (dst[i]) | |
21 : "m" (dst[i]), "m" (src[i]) | |
22 : "memory", "%xmm1", "%xmm2"); | |
23 #else | |
24 dm_vector_add(dst + i, src + i); | 15 dm_vector_add(dst + i, src + i); |
25 #endif | |
26 } | 16 } |
27 } | 17 } |
28 | 18 |
29 | 19 |
30 void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) | 20 void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) |
31 { | 21 { |
32 int i; | 22 int i; |
33 for (i = 0; i < nlist; i++) | 23 for (i = 0; i < nlist; i++) |
34 { | 24 { |
35 #ifdef DM_USE_SIMD | |
36 asm("movups %2, %%xmm1\n" | |
37 "movups %1, %%xmm2\n" | |
38 "addps %%xmm2, %%xmm1\n" | |
39 "movups %%xmm1, %0\n" | |
40 : "=m" (dst[i]) | |
41 : "m" (src1[i]), "m" (src2[i]) | |
42 : "memory", "%xmm1", "%xmm2"); | |
43 #else | |
44 dm_vector_add_r(dst + i, src1 + i, src2 + i); | 25 dm_vector_add_r(dst + i, src1 + i, src2 + i); |
45 #endif | |
46 } | 26 } |
47 } | 27 } |
48 | 28 |
49 | 29 |
50 void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist) | 30 void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist) |
51 { | 31 { |
52 int i; | 32 int i; |
53 for (i = 0; i < nlist; i++) | 33 for (i = 0; i < nlist; i++) |
54 { | 34 { |
55 #ifdef DM_USE_SIMD | |
56 asm("movups %2, %%xmm1\n" | |
57 "movups %1, %%xmm2\n" | |
58 "subps %%xmm2, %%xmm1\n" | |
59 "movups %%xmm1, %0\n" | |
60 : "=m" (dst[i]) | |
61 : "m" (dst[i]), "m" (src[i]) | |
62 : "memory", "%xmm1", "%xmm2"); | |
63 #else | |
64 dm_vector_add(dst + i, src + i); | 35 dm_vector_add(dst + i, src + i); |
65 #endif | |
66 } | 36 } |
67 } | 37 } |
68 | 38 |
69 | 39 |
70 void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) | 40 void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) |
71 { | 41 { |
72 int i; | 42 int i; |
73 for (i = 0; i < nlist; i++) | 43 for (i = 0; i < nlist; i++) |
74 { | 44 { |
75 #ifdef DM_USE_SIMD | |
76 asm("movups %2, %%xmm1\n" | |
77 "movups %1, %%xmm2\n" | |
78 "subps %%xmm2, %%xmm1\n" | |
79 "movups %%xmm1, %0\n" | |
80 : "=m" (dst[i]) | |
81 : "m" (src1[i]), "m" (src2[i]) | |
82 : "memory", "%xmm1", "%xmm2"); | |
83 #else | |
84 dm_vector_sub_r(dst + i, src1 + i, src2 + i); | 45 dm_vector_sub_r(dst + i, src1 + i, src2 + i); |
85 #endif | |
86 } | 46 } |
87 } | 47 } |
88 | 48 |
89 | 49 |
90 /* Multiply given vector with a matrix | 50 /* Multiply given vector with a matrix |
91 */ | 51 */ |
92 void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat) | 52 void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat) |
93 { | 53 { |
94 #ifdef DM_USE_SIMD | |
95 asm volatile( | |
96 "mov %1, %%edx\n" | |
97 "movups (%%edx), %%xmm4\n" | |
98 "movups 16(%%edx), %%xmm5\n" | |
99 "movups 32(%%edx), %%xmm6\n" | |
100 "movups 48(%%edx), %%xmm7\n" | |
101 | |
102 // vector -> xmm0 | |
103 "movups %2, %%xmm0\n" | |
104 | |
105 // zero final result in xmm2 | |
106 "xorps %%xmm2, %%xmm2\n" | |
107 | |
108 // perform shuffle and multiply and add whole "column" "X" | |
109 "movups %%xmm0, %%xmm1\n" | |
110 "shufps $0x00, %%xmm1, %%xmm1\n" | |
111 "mulps %%xmm4, %%xmm1\n" | |
112 "addps %%xmm1, %%xmm2\n" | |
113 | |
114 // Y | |
115 "movups %%xmm0, %%xmm1\n" | |
116 "shufps $0x55, %%xmm1, %%xmm1\n" | |
117 "mulps %%xmm5, %%xmm1\n" | |
118 "addps %%xmm1, %%xmm2\n" | |
119 | |
120 // Z | |
121 "movups %%xmm0, %%xmm1\n" | |
122 "shufps $0xAA, %%xmm1, %%xmm1\n" | |
123 "mulps %%xmm6, %%xmm1\n" | |
124 "addps %%xmm1, %%xmm2\n" | |
125 | |
126 // W | |
127 "movups %%xmm0, %%xmm1\n" | |
128 "shufps $0xFF, %%xmm1, %%xmm1\n" | |
129 "mulps %%xmm7, %%xmm1\n" | |
130 "addps %%xmm1, %%xmm2\n" | |
131 | |
132 // Result -> | |
133 "movups %%xmm2, %0\n" | |
134 : "=m" (vd) | |
135 : "m" (mat), "m" (vs) | |
136 : "memory", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7" | |
137 ); | |
138 #else | |
139 vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]); | 54 vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]); |
140 vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]); | 55 vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]); |
141 vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]); | 56 vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]); |
142 #endif | |
143 } | 57 } |
144 | 58 |
145 | 59 |
146 /* Multiply list of given vectors with given matrix. | 60 /* Multiply list of given vectors with given matrix. |
147 */ | 61 */ |
148 void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat) | 62 void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat) |
149 { | 63 { |
150 int i; | 64 int i; |
151 | 65 |
152 #ifdef DM_USE_SIMD | |
153 asm volatile( | |
154 "mov %0, %%edx\n" | |
155 "movups (%%edx), %%xmm4\n" | |
156 "movups 16(%%edx), %%xmm5\n" | |
157 "movups 32(%%edx), %%xmm6\n" | |
158 "movups 48(%%edx), %%xmm7\n" | |
159 : | |
160 : "m" (mat) | |
161 : "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7" | |
162 ); | |
163 #endif | |
164 | |
165 for (i = 0; i < nlist; i++) | 66 for (i = 0; i < nlist; i++) |
166 { | 67 { |
167 #ifdef DM_USE_SIMD | |
168 asm volatile | |
169 ( | |
170 // list[i] -> xmm0 | |
171 "movups %1, %%xmm0\n" | |
172 | |
173 // zero final result in xmm2 | |
174 "xorps %%xmm2, %%xmm2\n" | |
175 | |
176 // perform shuffle and multiply and add whole "column" "X" | |
177 "movups %%xmm0, %%xmm1\n" | |
178 "shufps $0x00, %%xmm1, %%xmm1\n" | |
179 "mulps %%xmm4, %%xmm1\n" | |
180 "addps %%xmm1, %%xmm2\n" | |
181 | |
182 // Y | |
183 "movups %%xmm0, %%xmm1\n" | |
184 "shufps $0x55, %%xmm1, %%xmm1\n" | |
185 "mulps %%xmm5, %%xmm1\n" | |
186 "addps %%xmm1, %%xmm2\n" | |
187 | |
188 // Z | |
189 "movups %%xmm0, %%xmm1\n" | |
190 "shufps $0xAA, %%xmm1, %%xmm1\n" | |
191 "mulps %%xmm6, %%xmm1\n" | |
192 "addps %%xmm1, %%xmm2\n" | |
193 | |
194 // W | |
195 "movups %%xmm0, %%xmm1\n" | |
196 "shufps $0xFF, %%xmm1, %%xmm1\n" | |
197 "mulps %%xmm7, %%xmm1\n" | |
198 "addps %%xmm1, %%xmm2\n" | |
199 | |
200 // Result -> | |
201 "movups %%xmm2, %0\n" | |
202 : "=m" (list[i]) | |
203 : "m" (list[i]) | |
204 : "memory", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"); | |
205 #else | |
206 DMVector q; | 68 DMVector q; |
207 memcpy(&q, &list[i], sizeof(DMVector)); | 69 memcpy(&q, &list[i], sizeof(DMVector)); |
208 | 70 |
209 list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]); | 71 list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]); |
210 list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]); | 72 list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]); |
211 list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]); | 73 list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]); |
212 #endif | |
213 } | 74 } |
214 } | 75 } |
215 | 76 |
216 | 77 |
217 /* Set matrix to unit-matrix | 78 /* Set matrix to unit-matrix |
240 | 101 |
241 /* Multiply matrices mat1 and mat2, putting result into mat1 | 102 /* Multiply matrices mat1 and mat2, putting result into mat1 |
242 */ | 103 */ |
243 void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2) | 104 void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2) |
244 { | 105 { |
245 #ifdef DM_USE_SIMD | |
246 asm volatile( | |
247 "mov %1, %%ebx\n" | |
248 "mov %2, %%edx\n" | |
249 | |
250 // -------------------------------------------------- | |
251 | |
252 // 0 | |
253 "movups (%%ebx), %%xmm0\n" // mat1[0] | |
254 "movups (%%edx), %%xmm1\n" // mat2[0] | |
255 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] | |
256 "mulps %%xmm0, %%xmm1\n" | |
257 "movups %%xmm1, %%xmm3\n" | |
258 | |
259 // 1 | |
260 "movups 16(%%ebx), %%xmm0\n" // mat1[0] | |
261 "movups (%%edx), %%xmm1\n" // mat2[0] | |
262 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] | |
263 "mulps %%xmm0, %%xmm1\n" | |
264 "addps %%xmm1, %%xmm3\n" | |
265 | |
266 // 2 | |
267 "movups 32(%%ebx), %%xmm0\n" // mat1[0] | |
268 "movups (%%edx), %%xmm1\n" // mat2[0] | |
269 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] | |
270 "mulps %%xmm0, %%xmm1\n" | |
271 "addps %%xmm1, %%xmm3\n" | |
272 | |
273 // 3 | |
274 "movups 48(%%ebx), %%xmm0\n" // mat1[0] | |
275 "movups (%%edx), %%xmm1\n" // mat2[0] | |
276 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] | |
277 "mulps %%xmm0, %%xmm1\n" | |
278 "addps %%xmm1, %%xmm3\n" | |
279 | |
280 "mov %0, %%ebx\n" | |
281 "movups %%xmm3, (%%ebx)\n" | |
282 | |
283 // -------------------------------------------------- | |
284 | |
285 "mov %1, %%ebx\n" | |
286 | |
287 // 0 | |
288 "movups (%%ebx), %%xmm0\n" // mat1[0] | |
289 "movups 16(%%edx), %%xmm1\n" // mat2[1] | |
290 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] | |
291 "mulps %%xmm0, %%xmm1\n" | |
292 "movups %%xmm1, %%xmm3\n" | |
293 | |
294 // 1 | |
295 "movups 16(%%ebx), %%xmm0\n" // mat1[0] | |
296 "movups 16(%%edx), %%xmm1\n" // mat2[1] | |
297 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] | |
298 "mulps %%xmm0, %%xmm1\n" | |
299 "addps %%xmm1, %%xmm3\n" | |
300 | |
301 // 2 | |
302 "movups 32(%%ebx), %%xmm0\n" // mat1[0] | |
303 "movups 16(%%edx), %%xmm1\n" // mat2[1] | |
304 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] | |
305 "mulps %%xmm0, %%xmm1\n" | |
306 "addps %%xmm1, %%xmm3\n" | |
307 | |
308 // 3 | |
309 "movups 48(%%ebx), %%xmm0\n" // mat1[0] | |
310 "movups 16(%%edx), %%xmm1\n" // mat2[1] | |
311 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] | |
312 "mulps %%xmm0, %%xmm1\n" | |
313 "addps %%xmm1, %%xmm3\n" | |
314 | |
315 "mov %0, %%ebx\n" | |
316 "movups %%xmm3, 16(%%ebx)\n" | |
317 | |
318 // -------------------------------------------------- | |
319 | |
320 "mov %1, %%ebx\n" | |
321 | |
322 // 0 | |
323 "movups (%%ebx), %%xmm0\n" // mat1[0] | |
324 "movups 32(%%edx), %%xmm1\n" // mat2[1] | |
325 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] | |
326 "mulps %%xmm0, %%xmm1\n" | |
327 "movups %%xmm1, %%xmm3\n" | |
328 | |
329 // 1 | |
330 "movups 16(%%ebx), %%xmm0\n" // mat1[0] | |
331 "movups 32(%%edx), %%xmm1\n" // mat2[1] | |
332 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] | |
333 "mulps %%xmm0, %%xmm1\n" | |
334 "addps %%xmm1, %%xmm3\n" | |
335 | |
336 // 2 | |
337 "movups 32(%%ebx), %%xmm0\n" // mat1[0] | |
338 "movups 32(%%edx), %%xmm1\n" // mat2[1] | |
339 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] | |
340 "mulps %%xmm0, %%xmm1\n" | |
341 "addps %%xmm1, %%xmm3\n" | |
342 | |
343 // 3 | |
344 "movups 48(%%ebx), %%xmm0\n" // mat1[0] | |
345 "movups 32(%%edx), %%xmm1\n" // mat2[1] | |
346 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] | |
347 "mulps %%xmm0, %%xmm1\n" | |
348 "addps %%xmm1, %%xmm3\n" | |
349 | |
350 "mov %0, %%ebx\n" | |
351 "movups %%xmm3, 32(%%ebx)\n" | |
352 | |
353 // -------------------------------------------------- | |
354 | |
355 "mov %1, %%ebx\n" | |
356 | |
357 // 0 | |
358 "movups (%%ebx), %%xmm0\n" // mat1[0] | |
359 "movups 48(%%edx), %%xmm1\n" // mat2[1] | |
360 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] | |
361 "mulps %%xmm0, %%xmm1\n" | |
362 "movups %%xmm1, %%xmm3\n" | |
363 | |
364 // 1 | |
365 "movups 16(%%ebx), %%xmm0\n" // mat1[0] | |
366 "movups 48(%%edx), %%xmm1\n" // mat2[1] | |
367 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] | |
368 "mulps %%xmm0, %%xmm1\n" | |
369 "addps %%xmm1, %%xmm3\n" | |
370 | |
371 // 2 | |
372 "movups 32(%%ebx), %%xmm0\n" // mat1[0] | |
373 "movups 48(%%edx), %%xmm1\n" // mat2[1] | |
374 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] | |
375 "mulps %%xmm0, %%xmm1\n" | |
376 "addps %%xmm1, %%xmm3\n" | |
377 | |
378 // 3 | |
379 "movups 48(%%ebx), %%xmm0\n" // mat1[0] | |
380 "movups 48(%%edx), %%xmm1\n" // mat2[1] | |
381 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] | |
382 "mulps %%xmm0, %%xmm1\n" | |
383 "addps %%xmm1, %%xmm3\n" | |
384 | |
385 "mov %0, %%ebx\n" | |
386 "movups %%xmm3, 48(%%ebx)\n" | |
387 | |
388 : "=m" (dst) | |
389 : "m" (mat1), "m" (mat2) | |
390 : "memory", "%edx", "%ebx", "%xmm0", "%xmm2", "%xmm3" | |
391 ); | |
392 #else | |
393 int i, j; | 106 int i, j; |
394 for (i = 0; i < DM_MATRIX_SIZE; i++) | 107 for (i = 0; i < DM_MATRIX_SIZE; i++) |
395 for (j = 0; j < DM_MATRIX_SIZE; j++) | 108 for (j = 0; j < DM_MATRIX_SIZE; j++) |
396 dst->m[i][j] = | 109 dst->m[i][j] = |
397 (mat1->m[i][0] * mat2->m[0][j]) + | 110 (mat1->m[i][0] * mat2->m[0][j]) + |
398 (mat1->m[i][1] * mat2->m[1][j]) + | 111 (mat1->m[i][1] * mat2->m[1][j]) + |
399 (mat1->m[i][2] * mat2->m[2][j]); | 112 (mat1->m[i][2] * mat2->m[2][j]); |
400 #endif | |
401 } | 113 } |
402 | 114 |
403 | 115 |
404 void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2) | 116 void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2) |
405 { | 117 { |
450 | 162 |
451 | 163 |
452 mat->m[2][0] = (sy * l) + i; | 164 mat->m[2][0] = (sy * l) + i; |
453 mat->m[2][1] = (sy * q) - j; | 165 mat->m[2][1] = (sy * q) - j; |
454 mat->m[2][2] = cx * cy; | 166 mat->m[2][2] = cx * cy; |
455 | |
456 } | 167 } |