dmlib: src/dmvecmat.c comparison

comparison src/dmvecmat.c @ 1306:696c58784635

Remove SSE2 SIMD vector math paths, apparently they no longer work (segfault).. maybe a 32bit/64bit issue.

author	Matti Hamalainen <ccr@tnsp.org>
date	Sat, 19 Aug 2017 13:02:34 +0300
parents	848a88ce7a57
children	69a5af2eb1ea

comparison

equal deleted inserted replaced

-:582d49915059
+:696c58784635
 void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist)
 {
 int i;
 for (i = 0; i < nlist; i++)
 {
-#ifdef DM_USE_SIMD
-asm("movups      %2,     %%xmm1\n"
-"movups      %1,     %%xmm2\n"
-"addps       %%xmm2, %%xmm1\n"
-"movups      %%xmm1, %0\n"
-: "=m" (dst[i])
-: "m" (dst[i]), "m" (src[i])
-: "memory", "%xmm1", "%xmm2");
-#else
 dm_vector_add(dst + i, src + i);
-#endif
 }
 }
 void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist)
 {
 int i;
 for (i = 0; i < nlist; i++)
 {
-#ifdef DM_USE_SIMD
-asm("movups      %2,     %%xmm1\n"
-"movups      %1,     %%xmm2\n"
-"addps       %%xmm2, %%xmm1\n"
-"movups      %%xmm1, %0\n"
-: "=m" (dst[i])
-: "m" (src1[i]), "m" (src2[i])
-: "memory", "%xmm1", "%xmm2");
-#else
 dm_vector_add_r(dst + i, src1 + i, src2 + i);
-#endif
 }
 }
 void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist)
 {
 int i;
 for (i = 0; i < nlist; i++)
 {
-#ifdef DM_USE_SIMD
-asm("movups      %2,     %%xmm1\n"
-"movups      %1,     %%xmm2\n"
-"subps       %%xmm2, %%xmm1\n"
-"movups      %%xmm1, %0\n"
-: "=m" (dst[i])
-: "m" (dst[i]), "m" (src[i])
-: "memory", "%xmm1", "%xmm2");
-#else
 dm_vector_add(dst + i, src + i);
-#endif
 }
 }
 void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist)
 {
 int i;
 for (i = 0; i < nlist; i++)
 {
-#ifdef DM_USE_SIMD
-asm("movups      %2,     %%xmm1\n"
-"movups      %1,     %%xmm2\n"
-"subps       %%xmm2, %%xmm1\n"
-"movups      %%xmm1, %0\n"
-: "=m" (dst[i])
-: "m" (src1[i]), "m" (src2[i])
-: "memory", "%xmm1", "%xmm2");
-#else
 dm_vector_sub_r(dst + i, src1 + i, src2 + i);
-#endif
 }
 }
 /* Multiply given vector with a matrix
 */
 void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat)
 {
-#ifdef DM_USE_SIMD
-asm volatile(
-"mov           %1,        %%edx\n"
-"movups        (%%edx),   %%xmm4\n"
-"movups      16(%%edx),   %%xmm5\n"
-"movups      32(%%edx),   %%xmm6\n"
-"movups      48(%%edx),   %%xmm7\n"
-// vector -> xmm0
-"movups      %2,     %%xmm0\n"
-// zero final result in xmm2
-"xorps       %%xmm2, %%xmm2\n"
-// perform shuffle and multiply and add whole "column" "X"
-"movups      %%xmm0, %%xmm1\n"
-"shufps      $0x00,  %%xmm1, %%xmm1\n"
-"mulps       %%xmm4, %%xmm1\n"
-"addps       %%xmm1, %%xmm2\n"
-// Y
-"movups      %%xmm0, %%xmm1\n"
-"shufps      $0x55,  %%xmm1, %%xmm1\n"
-"mulps       %%xmm5, %%xmm1\n"
-"addps       %%xmm1, %%xmm2\n"
-// Z
-"movups      %%xmm0, %%xmm1\n"
-"shufps      $0xAA,  %%xmm1, %%xmm1\n"
-"mulps       %%xmm6, %%xmm1\n"
-"addps       %%xmm1, %%xmm2\n"
-// W
-"movups      %%xmm0, %%xmm1\n"
-"shufps      $0xFF,  %%xmm1, %%xmm1\n"
-"mulps       %%xmm7, %%xmm1\n"
-"addps       %%xmm1, %%xmm2\n"
-// Result ->
-"movups      %%xmm2, %0\n"
-: "=m" (vd)
-: "m" (mat), "m" (vs)
-: "memory", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
-);
-#else
 vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]);
 vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]);
 vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]);
-#endif
 }
 /* Multiply list of given vectors with given matrix.
 */
 void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat)
 {
 int i;
-#ifdef DM_USE_SIMD
-asm volatile(
-"mov           %0,        %%edx\n"
-"movups        (%%edx),   %%xmm4\n"
-"movups      16(%%edx),   %%xmm5\n"
-"movups      32(%%edx),   %%xmm6\n"
-"movups      48(%%edx),   %%xmm7\n"
-:
-: "m" (mat)
-: "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
-);
-#endif
 for (i = 0; i < nlist; i++)
 {
-#ifdef DM_USE_SIMD
-asm volatile
-(
-// list[i] -> xmm0
-"movups      %1,     %%xmm0\n"
-// zero final result in xmm2
-"xorps       %%xmm2, %%xmm2\n"
-// perform shuffle and multiply and add whole "column" "X"
-"movups      %%xmm0, %%xmm1\n"
-"shufps      $0x00,  %%xmm1, %%xmm1\n"
-"mulps       %%xmm4, %%xmm1\n"
-"addps       %%xmm1, %%xmm2\n"
-// Y
-"movups      %%xmm0, %%xmm1\n"
-"shufps      $0x55,  %%xmm1, %%xmm1\n"
-"mulps       %%xmm5, %%xmm1\n"
-"addps       %%xmm1, %%xmm2\n"
-// Z
-"movups      %%xmm0, %%xmm1\n"
-"shufps      $0xAA,  %%xmm1, %%xmm1\n"
-"mulps       %%xmm6, %%xmm1\n"
-"addps       %%xmm1, %%xmm2\n"
-// W
-"movups      %%xmm0, %%xmm1\n"
-"shufps      $0xFF,  %%xmm1, %%xmm1\n"
-"mulps       %%xmm7, %%xmm1\n"
-"addps       %%xmm1, %%xmm2\n"
-// Result ->
-"movups      %%xmm2, %0\n"
-: "=m" (list[i])
-: "m" (list[i])
-: "memory", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
-#else
 DMVector q;
 memcpy(&q, &list[i], sizeof(DMVector));
 list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]);
 list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]);
 list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]);
-#endif
 }
 }
 /* Set matrix to unit-matrix
 /* Multiply matrices mat1 and mat2, putting result into mat1
 */
 void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2)
 {
-#ifdef DM_USE_SIMD
-asm volatile(
-"mov           %1,        %%ebx\n"
-"mov           %2,        %%edx\n"
-// --------------------------------------------------
-// 0
-"movups        (%%ebx),   %%xmm0\n" // mat1[0]
-"movups        (%%edx),   %%xmm1\n" // mat2[0]
-"shufps        $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
-"mulps         %%xmm0,    %%xmm1\n"
-"movups        %%xmm1,    %%xmm3\n"
-// 1
-"movups        16(%%ebx), %%xmm0\n" // mat1[0]
-"movups        (%%edx),   %%xmm1\n" // mat2[0]
-"shufps        $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-// 2
-"movups        32(%%ebx), %%xmm0\n" // mat1[0]
-"movups        (%%edx),   %%xmm1\n" // mat2[0]
-"shufps        $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-// 3
-"movups        48(%%ebx), %%xmm0\n" // mat1[0]
-"movups        (%%edx),   %%xmm1\n" // mat2[0]
-"shufps        $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-"mov           %0,        %%ebx\n"
-"movups        %%xmm3,    (%%ebx)\n"
-// --------------------------------------------------
-"mov           %1,        %%ebx\n"
-// 0
-"movups        (%%ebx),   %%xmm0\n" // mat1[0]
-"movups        16(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
-"mulps         %%xmm0,    %%xmm1\n"
-"movups        %%xmm1,    %%xmm3\n"
-// 1
-"movups        16(%%ebx), %%xmm0\n" // mat1[0]
-"movups        16(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-// 2
-"movups        32(%%ebx), %%xmm0\n" // mat1[0]
-"movups        16(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-// 3
-"movups        48(%%ebx), %%xmm0\n" // mat1[0]
-"movups        16(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-"mov           %0,        %%ebx\n"
-"movups        %%xmm3,    16(%%ebx)\n"
-// --------------------------------------------------
-"mov           %1,        %%ebx\n"
-// 0
-"movups        (%%ebx),   %%xmm0\n" // mat1[0]
-"movups        32(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
-"mulps         %%xmm0,    %%xmm1\n"
-"movups        %%xmm1,    %%xmm3\n"
-// 1
-"movups        16(%%ebx), %%xmm0\n" // mat1[0]
-"movups        32(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-// 2
-"movups        32(%%ebx), %%xmm0\n" // mat1[0]
-"movups        32(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-// 3
-"movups        48(%%ebx), %%xmm0\n" // mat1[0]
-"movups        32(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-"mov           %0,        %%ebx\n"
-"movups        %%xmm3,    32(%%ebx)\n"
-// --------------------------------------------------
-"mov           %1,        %%ebx\n"
-// 0
-"movups        (%%ebx),   %%xmm0\n" // mat1[0]
-"movups        48(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
-"mulps         %%xmm0,    %%xmm1\n"
-"movups        %%xmm1,    %%xmm3\n"
-// 1
-"movups        16(%%ebx), %%xmm0\n" // mat1[0]
-"movups        48(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-// 2
-"movups        32(%%ebx), %%xmm0\n" // mat1[0]
-"movups        48(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-// 3
-"movups        48(%%ebx), %%xmm0\n" // mat1[0]
-"movups        48(%%edx), %%xmm1\n" // mat2[1]
-"shufps        $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
-"mulps         %%xmm0,    %%xmm1\n"
-"addps         %%xmm1,    %%xmm3\n"
-"mov           %0,        %%ebx\n"
-"movups        %%xmm3,    48(%%ebx)\n"
-: "=m" (dst)
-: "m" (mat1), "m" (mat2)
-: "memory", "%edx", "%ebx", "%xmm0", "%xmm2", "%xmm3"
-);
-#else
 int i, j;
 for (i = 0; i < DM_MATRIX_SIZE; i++)
 for (j = 0; j < DM_MATRIX_SIZE; j++)
 dst->m[i][j] =
 (mat1->m[i][0] * mat2->m[0][j]) +
 (mat1->m[i][1] * mat2->m[1][j]) +
 (mat1->m[i][2] * mat2->m[2][j]);
-#endif
 }
 void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2)
 {
 mat->m[2][0] = (sy * l) + i;
 mat->m[2][1] = (sy * q) - j;
 mat->m[2][2] = cx * cy;
 }

Mercurial > hg > dmlib

comparison src/dmvecmat.c @ 1306:696c58784635