Mercurial > hg > dmlib
annotate dmvecmat.c @ 283:4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
compiler optimizations too much.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 11 Oct 2012 08:50:03 +0300 |
parents | 9ba4f25abbce |
children | 1994cc78ce6c |
rev | line source |
---|---|
0 | 1 /* |
2 * DMLib | |
3 * -- Vector and matrix functions | |
4 * Programmed and designed by Matti 'ccr' Hamalainen | |
5 * (C) Copyright 2011 Tecnic Software productions (TNSP) | |
6 */ | |
7 #include "dmvecmat.h" | |
8 | |
9 | |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
10 void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
11 { |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
12 int i; |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
13 for (i = 0; i < nlist; i++) |
283
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
14 { |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
15 #ifdef DM_USE_SIMD |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
16 asm("movups %2, %%xmm1\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
17 "movups %1, %%xmm2\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
18 "addps %%xmm2, %%xmm1\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
19 "movups %%xmm1, %0\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
20 : "=m" (dst[i]) |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
21 : "m" (dst[i]), "m" (src[i]) |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
22 : "memory", "%xmm1", "%xmm2"); |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
23 #else |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
24 dm_vector_add(dst + i, src + i); |
283
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
25 #endif |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
26 } |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
27 } |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
28 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
29 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
30 void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
31 { |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
32 int i; |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
33 for (i = 0; i < nlist; i++) |
283
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
34 { |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
35 #ifdef DM_USE_SIMD |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
36 asm("movups %2, %%xmm1\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
37 "movups %1, %%xmm2\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
38 "addps %%xmm2, %%xmm1\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
39 "movups %%xmm1, %0\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
40 : "=m" (dst[i]) |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
41 : "m" (src1[i]), "m" (src2[i]) |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
42 : "memory", "%xmm1", "%xmm2"); |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
43 #else |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
44 dm_vector_add_r(dst + i, src1 + i, src2 + i); |
283
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
45 #endif |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
46 } |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
47 } |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
48 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
49 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
50 void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
51 { |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
52 int i; |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
53 for (i = 0; i < nlist; i++) |
283
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
54 { |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
55 #ifdef DM_USE_SIMD |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
56 asm("movups %2, %%xmm1\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
57 "movups %1, %%xmm2\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
58 "subps %%xmm2, %%xmm1\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
59 "movups %%xmm1, %0\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
60 : "=m" (dst[i]) |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
61 : "m" (dst[i]), "m" (src[i]) |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
62 : "memory", "%xmm1", "%xmm2"); |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
63 #else |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
64 dm_vector_add(dst + i, src + i); |
283
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
65 #endif |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
66 } |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
67 } |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
68 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
69 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
70 void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
71 { |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
72 int i; |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
73 for (i = 0; i < nlist; i++) |
283
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
74 { |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
75 #ifdef DM_USE_SIMD |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
76 asm("movups %2, %%xmm1\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
77 "movups %1, %%xmm2\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
78 "subps %%xmm2, %%xmm1\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
79 "movups %%xmm1, %0\n" |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
80 : "=m" (dst[i]) |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
81 : "m" (src1[i]), "m" (src2[i]) |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
82 : "memory", "%xmm1", "%xmm2"); |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
83 #else |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
84 dm_vector_sub_r(dst + i, src1 + i, src2 + i); |
283
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
85 #endif |
4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents:
281
diff
changeset
|
86 } |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
87 } |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
88 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
89 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
90 /* Multiply given vector with a matrix |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
91 */ |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
92 void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
93 { |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
94 #ifdef DM_USE_SIMD |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
95 asm volatile( |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
96 "mov %1, %%edx\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
97 "movups (%%edx), %%xmm4\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
98 "movups 16(%%edx), %%xmm5\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
99 "movups 32(%%edx), %%xmm6\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
100 "movups 48(%%edx), %%xmm7\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
101 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
102 // vector -> xmm0 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
103 "movups %2, %%xmm0\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
104 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
105 // zero final result in xmm2 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
106 "xorps %%xmm2, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
107 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
108 // perform shuffle and multiply and add whole "column" "X" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
109 "movups %%xmm0, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
110 "shufps $0x00, %%xmm1, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
111 "mulps %%xmm4, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
112 "addps %%xmm1, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
113 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
114 // Y |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
115 "movups %%xmm0, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
116 "shufps $0x55, %%xmm1, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
117 "mulps %%xmm5, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
118 "addps %%xmm1, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
119 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
120 // Z |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
121 "movups %%xmm0, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
122 "shufps $0xAA, %%xmm1, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
123 "mulps %%xmm6, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
124 "addps %%xmm1, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
125 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
126 // W |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
127 "movups %%xmm0, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
128 "shufps $0xFF, %%xmm1, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
129 "mulps %%xmm7, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
130 "addps %%xmm1, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
131 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
132 // Result -> |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
133 "movups %%xmm2, %0\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
134 : "=m" (vd) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
135 : "m" (mat), "m" (vs) |
270
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
136 : "memory", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7" |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
137 ); |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
138 #else |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
139 vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]); |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
140 vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]); |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
141 vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]); |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
142 #endif |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
143 } |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
144 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
145 |
0 | 146 /* Multiply list of given vectors with given matrix. |
147 */ | |
148 void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat) | |
149 { | |
150 int i; | |
151 | |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
152 #ifdef DM_USE_SIMD |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
153 asm volatile( |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
154 "mov %0, %%edx\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
155 "movups (%%edx), %%xmm4\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
156 "movups 16(%%edx), %%xmm5\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
157 "movups 32(%%edx), %%xmm6\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
158 "movups 48(%%edx), %%xmm7\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
159 : |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
160 : "m" (mat) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
161 : "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
162 ); |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
163 #endif |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
164 |
0 | 165 for (i = 0; i < nlist; i++) |
166 { | |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
167 #ifdef DM_USE_SIMD |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
168 asm volatile |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
169 ( |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
170 // list[i] -> xmm0 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
171 "movups %1, %%xmm0\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
172 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
173 // zero final result in xmm2 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
174 "xorps %%xmm2, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
175 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
176 // perform shuffle and multiply and add whole "column" "X" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
177 "movups %%xmm0, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
178 "shufps $0x00, %%xmm1, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
179 "mulps %%xmm4, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
180 "addps %%xmm1, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
181 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
182 // Y |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
183 "movups %%xmm0, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
184 "shufps $0x55, %%xmm1, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
185 "mulps %%xmm5, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
186 "addps %%xmm1, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
187 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
188 // Z |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
189 "movups %%xmm0, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
190 "shufps $0xAA, %%xmm1, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
191 "mulps %%xmm6, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
192 "addps %%xmm1, %%xmm2\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
193 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
194 // W |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
195 "movups %%xmm0, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
196 "shufps $0xFF, %%xmm1, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
197 "mulps %%xmm7, %%xmm1\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
198 "addps %%xmm1, %%xmm2\n" |
280
47f774734b88
Enable W component manipulation.
Matti Hamalainen <ccr@tnsp.org>
parents:
279
diff
changeset
|
199 |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
200 // Result -> |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
201 "movups %%xmm2, %0\n" |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
202 : "=m" (list[i]) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
203 : "m" (list[i]) |
270
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
204 : "memory", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"); |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
205 #else |
0 | 206 DMVector q; |
207 memcpy(&q, &list[i], sizeof(DMVector)); | |
208 | |
209 list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]); | |
210 list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]); | |
211 list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]); | |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
212 #endif |
0 | 213 } |
214 } | |
215 | |
216 | |
217 /* Set matrix to unit-matrix | |
218 */ | |
219 void dm_matrix_unit(DMMatrix *mat) | |
220 { | |
221 memset(mat, 0, sizeof(DMMatrix)); | |
222 mat->m[0][0] = 1.0f; | |
223 mat->m[1][1] = 1.0f; | |
224 mat->m[2][2] = 1.0f; | |
279
5acc1232c8c0
Set the last 1 of the unit matrix as they are now 4x4 instead of 3x3.
Matti Hamalainen <ccr@tnsp.org>
parents:
270
diff
changeset
|
225 mat->m[3][3] = 1.0f; |
0 | 226 } |
227 | |
228 | |
229 /* Transpose the matrix mat2 to mat1 | |
230 */ | |
231 void dm_matrix_transpose(DMMatrix *mat1, const DMMatrix *mat2) | |
232 { | |
233 int i, j; | |
234 | |
235 for (i = 0; i < DM_MATRIX_SIZE; i++) | |
236 for (j = 0; j < DM_MATRIX_SIZE; j++) | |
237 mat1->m[i][j] = mat2->m[j][i]; | |
238 } | |
239 | |
240 | |
241 /* Multiply matrices mat1 and mat2, putting result into mat1 | |
242 */ | |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
243 void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
244 { |
270
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
245 #ifdef DM_USE_SIMD |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
246 asm volatile( |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
247 "mov %1, %%ebx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
248 "mov %2, %%edx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
249 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
250 // -------------------------------------------------- |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
251 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
252 // 0 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
253 "movups (%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
254 "movups (%%edx), %%xmm1\n" // mat2[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
255 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
256 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
257 "movups %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
258 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
259 // 1 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
260 "movups 16(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
261 "movups (%%edx), %%xmm1\n" // mat2[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
262 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
263 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
264 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
265 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
266 // 2 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
267 "movups 32(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
268 "movups (%%edx), %%xmm1\n" // mat2[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
269 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
270 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
271 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
272 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
273 // 3 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
274 "movups 48(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
275 "movups (%%edx), %%xmm1\n" // mat2[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
276 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
277 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
278 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
279 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
280 "mov %0, %%ebx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
281 "movups %%xmm3, (%%ebx)\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
282 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
283 // -------------------------------------------------- |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
284 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
285 "mov %1, %%ebx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
286 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
287 // 0 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
288 "movups (%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
289 "movups 16(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
290 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
291 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
292 "movups %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
293 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
294 // 1 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
295 "movups 16(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
296 "movups 16(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
297 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
298 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
299 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
300 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
301 // 2 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
302 "movups 32(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
303 "movups 16(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
304 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
305 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
306 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
307 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
308 // 3 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
309 "movups 48(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
310 "movups 16(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
311 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
312 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
313 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
314 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
315 "mov %0, %%ebx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
316 "movups %%xmm3, 16(%%ebx)\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
317 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
318 // -------------------------------------------------- |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
319 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
320 "mov %1, %%ebx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
321 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
322 // 0 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
323 "movups (%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
324 "movups 32(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
325 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
326 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
327 "movups %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
328 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
329 // 1 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
330 "movups 16(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
331 "movups 32(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
332 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
333 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
334 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
335 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
336 // 2 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
337 "movups 32(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
338 "movups 32(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
339 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
340 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
341 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
342 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
343 // 3 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
344 "movups 48(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
345 "movups 32(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
346 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
347 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
348 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
349 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
350 "mov %0, %%ebx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
351 "movups %%xmm3, 32(%%ebx)\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
352 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
353 // -------------------------------------------------- |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
354 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
355 "mov %1, %%ebx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
356 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
357 // 0 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
358 "movups (%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
359 "movups 48(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
360 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
361 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
362 "movups %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
363 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
364 // 1 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
365 "movups 16(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
366 "movups 48(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
367 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
368 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
369 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
370 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
371 // 2 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
372 "movups 32(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
373 "movups 48(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
374 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
375 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
376 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
377 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
378 // 3 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
379 "movups 48(%%ebx), %%xmm0\n" // mat1[0] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
380 "movups 48(%%edx), %%xmm1\n" // mat2[1] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
381 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
382 "mulps %%xmm0, %%xmm1\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
383 "addps %%xmm1, %%xmm3\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
384 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
385 "mov %0, %%ebx\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
386 "movups %%xmm3, 48(%%ebx)\n" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
387 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
388 : "=m" (dst) |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
389 : "m" (mat1), "m" (mat2) |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
390 : "memory", "%edx", "%ebx", "%xmm0", "%xmm2", "%xmm3" |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
391 ); |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
392 #else |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
393 int i, j; |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
394 for (i = 0; i < DM_MATRIX_SIZE; i++) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
395 for (j = 0; j < DM_MATRIX_SIZE; j++) |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
396 dst->m[i][j] = |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
397 (mat1->m[i][0] * mat2->m[0][j]) + |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
398 (mat1->m[i][1] * mat2->m[1][j]) + |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
399 (mat1->m[i][2] * mat2->m[2][j]); |
270
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
400 #endif |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
401 } |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
402 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
403 |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
404 void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2) |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
405 { |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
406 DMMatrix tmpM; |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
407 dm_matrix_mul_r(&tmpM, mat1, mat2); |
89a05a5e7a82
Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents:
269
diff
changeset
|
408 memcpy(mat1, &tmpM, sizeof(DMMatrix)); |
269
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
409 } |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
410 |
159264c27929
Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents:
239
diff
changeset
|
411 |
0 | 412 /* Multiply given list of matrices (size of nMatrices units) with given matrix. |
413 */ | |
414 void dm_matrix_mul_n(DMMatrix * list, const int nlist, const DMMatrix *mat) | |
415 { | |
416 int i; | |
417 for (i = 0; i < nlist; i++) | |
418 dm_matrix_mul(&list[i], mat); | |
419 } | |
420 | |
421 | |
422 /* Optimized rotation matrix creation | |
423 */ | |
424 void dm_matrix_rot(DMMatrix *mat, | |
425 const DMFloat sx, const DMFloat sy, const DMFloat sz, | |
426 const DMFloat cx, const DMFloat cy, const DMFloat cz) | |
427 { | |
428 const DMFloat | |
429 q = cx * sz, | |
430 l = cx * cz, | |
431 i = sx * sz, | |
432 j = sx * cz; | |
433 | |
281
9ba4f25abbce
Fix rotation matrix creation.
Matti Hamalainen <ccr@tnsp.org>
parents:
280
diff
changeset
|
434 memset(mat, 0, sizeof(DMMatrix)); |
9ba4f25abbce
Fix rotation matrix creation.
Matti Hamalainen <ccr@tnsp.org>
parents:
280
diff
changeset
|
435 |
0 | 436 mat->m[0][0] = cy * cz; |
437 mat->m[0][1] = cy * sz; | |
438 mat->m[0][2] = -sy; | |
439 | |
440 | |
441 mat->m[1][0] = (sy * j) - q; | |
442 mat->m[1][1] = (sy * i) + l; | |
443 mat->m[1][2] = sx * cy; | |
444 | |
445 | |
446 mat->m[2][0] = (sy * l) + i; | |
447 mat->m[2][1] = (sy * q) - j; | |
448 mat->m[2][2] = cx * cy; | |
281
9ba4f25abbce
Fix rotation matrix creation.
Matti Hamalainen <ccr@tnsp.org>
parents:
280
diff
changeset
|
449 |
9ba4f25abbce
Fix rotation matrix creation.
Matti Hamalainen <ccr@tnsp.org>
parents:
280
diff
changeset
|
450 mat->m[3][3] = 1.0f; |
0 | 451 } |