annotate dmvecmat.c @ 283:4d42b8910d7e

Using SSE inline asm is not worth it in single operations, it hinders compiler optimizations too much.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 11 Oct 2012 08:50:03 +0300
parents 9ba4f25abbce
children 1994cc78ce6c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
1 /*
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
2 * DMLib
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
3 * -- Vector and matrix functions
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
4 * Programmed and designed by Matti 'ccr' Hamalainen
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
5 * (C) Copyright 2011 Tecnic Software productions (TNSP)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
6 */
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
7 #include "dmvecmat.h"
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
8
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
9
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
10 void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
11 {
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
12 int i;
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
13 for (i = 0; i < nlist; i++)
283
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
14 {
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
15 #ifdef DM_USE_SIMD
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
16 asm("movups %2, %%xmm1\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
17 "movups %1, %%xmm2\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
18 "addps %%xmm2, %%xmm1\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
19 "movups %%xmm1, %0\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
20 : "=m" (dst[i])
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
21 : "m" (dst[i]), "m" (src[i])
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
22 : "memory", "%xmm1", "%xmm2");
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
23 #else
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
24 dm_vector_add(dst + i, src + i);
283
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
25 #endif
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
26 }
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
27 }
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
28
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
29
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
30 void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
31 {
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
32 int i;
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
33 for (i = 0; i < nlist; i++)
283
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
34 {
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
35 #ifdef DM_USE_SIMD
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
36 asm("movups %2, %%xmm1\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
37 "movups %1, %%xmm2\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
38 "addps %%xmm2, %%xmm1\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
39 "movups %%xmm1, %0\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
40 : "=m" (dst[i])
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
41 : "m" (src1[i]), "m" (src2[i])
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
42 : "memory", "%xmm1", "%xmm2");
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
43 #else
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
44 dm_vector_add_r(dst + i, src1 + i, src2 + i);
283
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
45 #endif
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
46 }
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
47 }
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
48
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
49
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
50 void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
51 {
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
52 int i;
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
53 for (i = 0; i < nlist; i++)
283
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
54 {
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
55 #ifdef DM_USE_SIMD
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
56 asm("movups %2, %%xmm1\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
57 "movups %1, %%xmm2\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
58 "subps %%xmm2, %%xmm1\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
59 "movups %%xmm1, %0\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
60 : "=m" (dst[i])
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
61 : "m" (dst[i]), "m" (src[i])
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
62 : "memory", "%xmm1", "%xmm2");
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
63 #else
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
64 dm_vector_add(dst + i, src + i);
283
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
65 #endif
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
66 }
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
67 }
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
68
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
69
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
70 void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
71 {
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
72 int i;
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
73 for (i = 0; i < nlist; i++)
283
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
74 {
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
75 #ifdef DM_USE_SIMD
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
76 asm("movups %2, %%xmm1\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
77 "movups %1, %%xmm2\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
78 "subps %%xmm2, %%xmm1\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
79 "movups %%xmm1, %0\n"
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
80 : "=m" (dst[i])
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
81 : "m" (src1[i]), "m" (src2[i])
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
82 : "memory", "%xmm1", "%xmm2");
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
83 #else
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
84 dm_vector_sub_r(dst + i, src1 + i, src2 + i);
283
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
85 #endif
4d42b8910d7e Using SSE inline asm is not worth it in single operations, it hinders
Matti Hamalainen <ccr@tnsp.org>
parents: 281
diff changeset
86 }
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
87 }
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
88
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
89
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
90 /* Multiply given vector with a matrix
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
91 */
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
92 void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
93 {
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
94 #ifdef DM_USE_SIMD
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
95 asm volatile(
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
96 "mov %1, %%edx\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
97 "movups (%%edx), %%xmm4\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
98 "movups 16(%%edx), %%xmm5\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
99 "movups 32(%%edx), %%xmm6\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
100 "movups 48(%%edx), %%xmm7\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
101
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
102 // vector -> xmm0
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
103 "movups %2, %%xmm0\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
104
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
105 // zero final result in xmm2
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
106 "xorps %%xmm2, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
107
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
108 // perform shuffle and multiply and add whole "column" "X"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
109 "movups %%xmm0, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
110 "shufps $0x00, %%xmm1, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
111 "mulps %%xmm4, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
112 "addps %%xmm1, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
113
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
114 // Y
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
115 "movups %%xmm0, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
116 "shufps $0x55, %%xmm1, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
117 "mulps %%xmm5, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
118 "addps %%xmm1, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
119
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
120 // Z
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
121 "movups %%xmm0, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
122 "shufps $0xAA, %%xmm1, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
123 "mulps %%xmm6, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
124 "addps %%xmm1, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
125
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
126 // W
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
127 "movups %%xmm0, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
128 "shufps $0xFF, %%xmm1, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
129 "mulps %%xmm7, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
130 "addps %%xmm1, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
131
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
132 // Result ->
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
133 "movups %%xmm2, %0\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
134 : "=m" (vd)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
135 : "m" (mat), "m" (vs)
270
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
136 : "memory", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
137 );
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
138 #else
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
139 vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]);
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
140 vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]);
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
141 vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]);
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
142 #endif
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
143 }
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
144
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
145
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
146 /* Multiply list of given vectors with given matrix.
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
147 */
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
148 void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
149 {
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
150 int i;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
151
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
152 #ifdef DM_USE_SIMD
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
153 asm volatile(
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
154 "mov %0, %%edx\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
155 "movups (%%edx), %%xmm4\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
156 "movups 16(%%edx), %%xmm5\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
157 "movups 32(%%edx), %%xmm6\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
158 "movups 48(%%edx), %%xmm7\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
159 :
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
160 : "m" (mat)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
161 : "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
162 );
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
163 #endif
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
164
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
165 for (i = 0; i < nlist; i++)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
166 {
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
167 #ifdef DM_USE_SIMD
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
168 asm volatile
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
169 (
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
170 // list[i] -> xmm0
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
171 "movups %1, %%xmm0\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
172
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
173 // zero final result in xmm2
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
174 "xorps %%xmm2, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
175
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
176 // perform shuffle and multiply and add whole "column" "X"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
177 "movups %%xmm0, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
178 "shufps $0x00, %%xmm1, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
179 "mulps %%xmm4, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
180 "addps %%xmm1, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
181
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
182 // Y
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
183 "movups %%xmm0, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
184 "shufps $0x55, %%xmm1, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
185 "mulps %%xmm5, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
186 "addps %%xmm1, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
187
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
188 // Z
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
189 "movups %%xmm0, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
190 "shufps $0xAA, %%xmm1, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
191 "mulps %%xmm6, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
192 "addps %%xmm1, %%xmm2\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
193
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
194 // W
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
195 "movups %%xmm0, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
196 "shufps $0xFF, %%xmm1, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
197 "mulps %%xmm7, %%xmm1\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
198 "addps %%xmm1, %%xmm2\n"
280
47f774734b88 Enable W component manipulation.
Matti Hamalainen <ccr@tnsp.org>
parents: 279
diff changeset
199
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
200 // Result ->
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
201 "movups %%xmm2, %0\n"
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
202 : "=m" (list[i])
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
203 : "m" (list[i])
270
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
204 : "memory", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
205 #else
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
206 DMVector q;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
207 memcpy(&q, &list[i], sizeof(DMVector));
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
208
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
209 list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]);
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
210 list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]);
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
211 list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]);
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
212 #endif
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
213 }
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
214 }
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
215
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
216
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
217 /* Set matrix to unit-matrix
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
218 */
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
219 void dm_matrix_unit(DMMatrix *mat)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
220 {
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
221 memset(mat, 0, sizeof(DMMatrix));
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
222 mat->m[0][0] = 1.0f;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
223 mat->m[1][1] = 1.0f;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
224 mat->m[2][2] = 1.0f;
279
5acc1232c8c0 Set the last 1 of the unit matrix as they are now 4x4 instead of 3x3.
Matti Hamalainen <ccr@tnsp.org>
parents: 270
diff changeset
225 mat->m[3][3] = 1.0f;
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
226 }
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
227
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
228
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
229 /* Transpose the matrix mat2 to mat1
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
230 */
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
231 void dm_matrix_transpose(DMMatrix *mat1, const DMMatrix *mat2)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
232 {
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
233 int i, j;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
234
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
235 for (i = 0; i < DM_MATRIX_SIZE; i++)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
236 for (j = 0; j < DM_MATRIX_SIZE; j++)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
237 mat1->m[i][j] = mat2->m[j][i];
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
238 }
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
239
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
240
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
241 /* Multiply matrices mat1 and mat2, putting result into mat1
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
242 */
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
243 void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
244 {
270
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
245 #ifdef DM_USE_SIMD
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
246 asm volatile(
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
247 "mov %1, %%ebx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
248 "mov %2, %%edx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
249
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
250 // --------------------------------------------------
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
251
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
252 // 0
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
253 "movups (%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
254 "movups (%%edx), %%xmm1\n" // mat2[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
255 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
256 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
257 "movups %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
258
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
259 // 1
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
260 "movups 16(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
261 "movups (%%edx), %%xmm1\n" // mat2[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
262 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
263 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
264 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
265
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
266 // 2
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
267 "movups 32(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
268 "movups (%%edx), %%xmm1\n" // mat2[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
269 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
270 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
271 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
272
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
273 // 3
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
274 "movups 48(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
275 "movups (%%edx), %%xmm1\n" // mat2[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
276 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
277 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
278 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
279
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
280 "mov %0, %%ebx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
281 "movups %%xmm3, (%%ebx)\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
282
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
283 // --------------------------------------------------
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
284
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
285 "mov %1, %%ebx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
286
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
287 // 0
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
288 "movups (%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
289 "movups 16(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
290 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
291 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
292 "movups %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
293
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
294 // 1
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
295 "movups 16(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
296 "movups 16(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
297 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
298 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
299 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
300
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
301 // 2
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
302 "movups 32(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
303 "movups 16(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
304 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
305 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
306 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
307
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
308 // 3
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
309 "movups 48(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
310 "movups 16(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
311 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
312 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
313 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
314
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
315 "mov %0, %%ebx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
316 "movups %%xmm3, 16(%%ebx)\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
317
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
318 // --------------------------------------------------
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
319
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
320 "mov %1, %%ebx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
321
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
322 // 0
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
323 "movups (%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
324 "movups 32(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
325 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
326 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
327 "movups %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
328
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
329 // 1
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
330 "movups 16(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
331 "movups 32(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
332 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
333 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
334 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
335
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
336 // 2
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
337 "movups 32(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
338 "movups 32(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
339 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
340 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
341 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
342
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
343 // 3
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
344 "movups 48(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
345 "movups 32(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
346 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
347 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
348 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
349
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
350 "mov %0, %%ebx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
351 "movups %%xmm3, 32(%%ebx)\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
352
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
353 // --------------------------------------------------
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
354
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
355 "mov %1, %%ebx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
356
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
357 // 0
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
358 "movups (%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
359 "movups 48(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
360 "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
361 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
362 "movups %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
363
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
364 // 1
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
365 "movups 16(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
366 "movups 48(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
367 "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
368 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
369 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
370
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
371 // 2
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
372 "movups 32(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
373 "movups 48(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
374 "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
375 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
376 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
377
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
378 // 3
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
379 "movups 48(%%ebx), %%xmm0\n" // mat1[0]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
380 "movups 48(%%edx), %%xmm1\n" // mat2[1]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
381 "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
382 "mulps %%xmm0, %%xmm1\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
383 "addps %%xmm1, %%xmm3\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
384
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
385 "mov %0, %%ebx\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
386 "movups %%xmm3, 48(%%ebx)\n"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
387
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
388 : "=m" (dst)
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
389 : "m" (mat1), "m" (mat2)
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
390 : "memory", "%edx", "%ebx", "%xmm0", "%xmm2", "%xmm3"
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
391 );
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
392 #else
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
393 int i, j;
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
394 for (i = 0; i < DM_MATRIX_SIZE; i++)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
395 for (j = 0; j < DM_MATRIX_SIZE; j++)
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
396 dst->m[i][j] =
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
397 (mat1->m[i][0] * mat2->m[0][j]) +
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
398 (mat1->m[i][1] * mat2->m[1][j]) +
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
399 (mat1->m[i][2] * mat2->m[2][j]);
270
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
400 #endif
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
401 }
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
402
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
403
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
404 void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2)
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
405 {
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
406 DMMatrix tmpM;
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
407 dm_matrix_mul_r(&tmpM, mat1, mat2);
89a05a5e7a82 Add (untested) SSE asm version of matrix product (matrix x matrix multiplication).
Matti Hamalainen <ccr@tnsp.org>
parents: 269
diff changeset
408 memcpy(mat1, &tmpM, sizeof(DMMatrix));
269
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
409 }
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
410
159264c27929 Add some new vector and matrix operations, and introduce some SSE inline
Matti Hamalainen <ccr@tnsp.org>
parents: 239
diff changeset
411
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
412 /* Multiply given list of matrices (size of nMatrices units) with given matrix.
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
413 */
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
414 void dm_matrix_mul_n(DMMatrix * list, const int nlist, const DMMatrix *mat)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
415 {
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
416 int i;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
417 for (i = 0; i < nlist; i++)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
418 dm_matrix_mul(&list[i], mat);
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
419 }
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
420
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
421
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
422 /* Optimized rotation matrix creation
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
423 */
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
424 void dm_matrix_rot(DMMatrix *mat,
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
425 const DMFloat sx, const DMFloat sy, const DMFloat sz,
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
426 const DMFloat cx, const DMFloat cy, const DMFloat cz)
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
427 {
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
428 const DMFloat
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
429 q = cx * sz,
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
430 l = cx * cz,
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
431 i = sx * sz,
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
432 j = sx * cz;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
433
281
9ba4f25abbce Fix rotation matrix creation.
Matti Hamalainen <ccr@tnsp.org>
parents: 280
diff changeset
434 memset(mat, 0, sizeof(DMMatrix));
9ba4f25abbce Fix rotation matrix creation.
Matti Hamalainen <ccr@tnsp.org>
parents: 280
diff changeset
435
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
436 mat->m[0][0] = cy * cz;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
437 mat->m[0][1] = cy * sz;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
438 mat->m[0][2] = -sy;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
439
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
440
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
441 mat->m[1][0] = (sy * j) - q;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
442 mat->m[1][1] = (sy * i) + l;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
443 mat->m[1][2] = sx * cy;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
444
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
445
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
446 mat->m[2][0] = (sy * l) + i;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
447 mat->m[2][1] = (sy * q) - j;
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
448 mat->m[2][2] = cx * cy;
281
9ba4f25abbce Fix rotation matrix creation.
Matti Hamalainen <ccr@tnsp.org>
parents: 280
diff changeset
449
9ba4f25abbce Fix rotation matrix creation.
Matti Hamalainen <ccr@tnsp.org>
parents: 280
diff changeset
450 mat->m[3][3] = 1.0f;
0
32250b436bca Initial re-import.
Matti Hamalainen <ccr@tnsp.org>
parents:
diff changeset
451 }