# HG changeset patch # User Matti Hamalainen # Date 1503136954 -10800 # Node ID 696c58784635518efff829c37320bc8b1b2a57be # Parent 582d49915059f8baacdb78c1a2cd8c2c7af65dd7 Remove SSE2 SIMD vector math paths, apparently they no longer work (segfault).. maybe a 32bit/64bit issue. diff -r 582d49915059 -r 696c58784635 src/dmvecmat.c --- a/src/dmvecmat.c Sat Aug 19 12:45:24 2017 +0300 +++ b/src/dmvecmat.c Sat Aug 19 13:02:34 2017 +0300 @@ -12,17 +12,7 @@ int i; for (i = 0; i < nlist; i++) { -#ifdef DM_USE_SIMD - asm("movups %2, %%xmm1\n" - "movups %1, %%xmm2\n" - "addps %%xmm2, %%xmm1\n" - "movups %%xmm1, %0\n" - : "=m" (dst[i]) - : "m" (dst[i]), "m" (src[i]) - : "memory", "%xmm1", "%xmm2"); -#else dm_vector_add(dst + i, src + i); -#endif } } @@ -32,17 +22,7 @@ int i; for (i = 0; i < nlist; i++) { -#ifdef DM_USE_SIMD - asm("movups %2, %%xmm1\n" - "movups %1, %%xmm2\n" - "addps %%xmm2, %%xmm1\n" - "movups %%xmm1, %0\n" - : "=m" (dst[i]) - : "m" (src1[i]), "m" (src2[i]) - : "memory", "%xmm1", "%xmm2"); -#else dm_vector_add_r(dst + i, src1 + i, src2 + i); -#endif } } @@ -52,17 +32,7 @@ int i; for (i = 0; i < nlist; i++) { -#ifdef DM_USE_SIMD - asm("movups %2, %%xmm1\n" - "movups %1, %%xmm2\n" - "subps %%xmm2, %%xmm1\n" - "movups %%xmm1, %0\n" - : "=m" (dst[i]) - : "m" (dst[i]), "m" (src[i]) - : "memory", "%xmm1", "%xmm2"); -#else dm_vector_add(dst + i, src + i); -#endif } } @@ -72,17 +42,7 @@ int i; for (i = 0; i < nlist; i++) { -#ifdef DM_USE_SIMD - asm("movups %2, %%xmm1\n" - "movups %1, %%xmm2\n" - "subps %%xmm2, %%xmm1\n" - "movups %%xmm1, %0\n" - : "=m" (dst[i]) - : "m" (src1[i]), "m" (src2[i]) - : "memory", "%xmm1", "%xmm2"); -#else dm_vector_sub_r(dst + i, src1 + i, src2 + i); -#endif } } @@ -91,55 +51,9 @@ */ void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat) { -#ifdef DM_USE_SIMD - asm volatile( - "mov %1, %%edx\n" - "movups (%%edx), %%xmm4\n" - "movups 16(%%edx), %%xmm5\n" - "movups 32(%%edx), %%xmm6\n" - "movups 48(%%edx), %%xmm7\n" - - // vector -> xmm0 - "movups %2, %%xmm0\n" - - // zero final result in xmm2 - "xorps %%xmm2, %%xmm2\n" - - // perform shuffle and multiply and add whole "column" "X" - "movups %%xmm0, %%xmm1\n" - "shufps $0x00, %%xmm1, %%xmm1\n" - "mulps %%xmm4, %%xmm1\n" - "addps %%xmm1, %%xmm2\n" - - // Y - "movups %%xmm0, %%xmm1\n" - "shufps $0x55, %%xmm1, %%xmm1\n" - "mulps %%xmm5, %%xmm1\n" - "addps %%xmm1, %%xmm2\n" - - // Z - "movups %%xmm0, %%xmm1\n" - "shufps $0xAA, %%xmm1, %%xmm1\n" - "mulps %%xmm6, %%xmm1\n" - "addps %%xmm1, %%xmm2\n" - - // W - "movups %%xmm0, %%xmm1\n" - "shufps $0xFF, %%xmm1, %%xmm1\n" - "mulps %%xmm7, %%xmm1\n" - "addps %%xmm1, %%xmm2\n" - - // Result -> - "movups %%xmm2, %0\n" - : "=m" (vd) - : "m" (mat), "m" (vs) - : "memory", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7" - ); -#else vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]); vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]); vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]); -#endif } @@ -149,67 +63,14 @@ { int i; -#ifdef DM_USE_SIMD - asm volatile( - "mov %0, %%edx\n" - "movups (%%edx), %%xmm4\n" - "movups 16(%%edx), %%xmm5\n" - "movups 32(%%edx), %%xmm6\n" - "movups 48(%%edx), %%xmm7\n" - : - : "m" (mat) - : "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7" - ); -#endif - for (i = 0; i < nlist; i++) { -#ifdef DM_USE_SIMD - asm volatile - ( - // list[i] -> xmm0 - "movups %1, %%xmm0\n" - - // zero final result in xmm2 - "xorps %%xmm2, %%xmm2\n" - - // perform shuffle and multiply and add whole "column" "X" - "movups %%xmm0, %%xmm1\n" - "shufps $0x00, %%xmm1, %%xmm1\n" - "mulps %%xmm4, %%xmm1\n" - "addps %%xmm1, %%xmm2\n" - - // Y - "movups %%xmm0, %%xmm1\n" - "shufps $0x55, %%xmm1, %%xmm1\n" - "mulps %%xmm5, %%xmm1\n" - "addps %%xmm1, %%xmm2\n" - - // Z - "movups %%xmm0, %%xmm1\n" - "shufps $0xAA, %%xmm1, %%xmm1\n" - "mulps %%xmm6, %%xmm1\n" - "addps %%xmm1, %%xmm2\n" - - // W - "movups %%xmm0, %%xmm1\n" - "shufps $0xFF, %%xmm1, %%xmm1\n" - "mulps %%xmm7, %%xmm1\n" - "addps %%xmm1, %%xmm2\n" - - // Result -> - "movups %%xmm2, %0\n" - : "=m" (list[i]) - : "m" (list[i]) - : "memory", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"); -#else DMVector q; memcpy(&q, &list[i], sizeof(DMVector)); list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]); list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]); list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]); -#endif } } @@ -242,154 +103,6 @@ */ void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2) { -#ifdef DM_USE_SIMD - asm volatile( - "mov %1, %%ebx\n" - "mov %2, %%edx\n" - - // -------------------------------------------------- - - // 0 - "movups (%%ebx), %%xmm0\n" // mat1[0] - "movups (%%edx), %%xmm1\n" // mat2[0] - "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] - "mulps %%xmm0, %%xmm1\n" - "movups %%xmm1, %%xmm3\n" - - // 1 - "movups 16(%%ebx), %%xmm0\n" // mat1[0] - "movups (%%edx), %%xmm1\n" // mat2[0] - "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - // 2 - "movups 32(%%ebx), %%xmm0\n" // mat1[0] - "movups (%%edx), %%xmm1\n" // mat2[0] - "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - // 3 - "movups 48(%%ebx), %%xmm0\n" // mat1[0] - "movups (%%edx), %%xmm1\n" // mat2[0] - "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - "mov %0, %%ebx\n" - "movups %%xmm3, (%%ebx)\n" - - // -------------------------------------------------- - - "mov %1, %%ebx\n" - - // 0 - "movups (%%ebx), %%xmm0\n" // mat1[0] - "movups 16(%%edx), %%xmm1\n" // mat2[1] - "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] - "mulps %%xmm0, %%xmm1\n" - "movups %%xmm1, %%xmm3\n" - - // 1 - "movups 16(%%ebx), %%xmm0\n" // mat1[0] - "movups 16(%%edx), %%xmm1\n" // mat2[1] - "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - // 2 - "movups 32(%%ebx), %%xmm0\n" // mat1[0] - "movups 16(%%edx), %%xmm1\n" // mat2[1] - "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - // 3 - "movups 48(%%ebx), %%xmm0\n" // mat1[0] - "movups 16(%%edx), %%xmm1\n" // mat2[1] - "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - "mov %0, %%ebx\n" - "movups %%xmm3, 16(%%ebx)\n" - - // -------------------------------------------------- - - "mov %1, %%ebx\n" - - // 0 - "movups (%%ebx), %%xmm0\n" // mat1[0] - "movups 32(%%edx), %%xmm1\n" // mat2[1] - "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] - "mulps %%xmm0, %%xmm1\n" - "movups %%xmm1, %%xmm3\n" - - // 1 - "movups 16(%%ebx), %%xmm0\n" // mat1[0] - "movups 32(%%edx), %%xmm1\n" // mat2[1] - "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - // 2 - "movups 32(%%ebx), %%xmm0\n" // mat1[0] - "movups 32(%%edx), %%xmm1\n" // mat2[1] - "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - // 3 - "movups 48(%%ebx), %%xmm0\n" // mat1[0] - "movups 32(%%edx), %%xmm1\n" // mat2[1] - "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - "mov %0, %%ebx\n" - "movups %%xmm3, 32(%%ebx)\n" - - // -------------------------------------------------- - - "mov %1, %%ebx\n" - - // 0 - "movups (%%ebx), %%xmm0\n" // mat1[0] - "movups 48(%%edx), %%xmm1\n" // mat2[1] - "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] - "mulps %%xmm0, %%xmm1\n" - "movups %%xmm1, %%xmm3\n" - - // 1 - "movups 16(%%ebx), %%xmm0\n" // mat1[0] - "movups 48(%%edx), %%xmm1\n" // mat2[1] - "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - // 2 - "movups 32(%%ebx), %%xmm0\n" // mat1[0] - "movups 48(%%edx), %%xmm1\n" // mat2[1] - "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - // 3 - "movups 48(%%ebx), %%xmm0\n" // mat1[0] - "movups 48(%%edx), %%xmm1\n" // mat2[1] - "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] - "mulps %%xmm0, %%xmm1\n" - "addps %%xmm1, %%xmm3\n" - - "mov %0, %%ebx\n" - "movups %%xmm3, 48(%%ebx)\n" - - : "=m" (dst) - : "m" (mat1), "m" (mat2) - : "memory", "%edx", "%ebx", "%xmm0", "%xmm2", "%xmm3" - ); -#else int i, j; for (i = 0; i < DM_MATRIX_SIZE; i++) for (j = 0; j < DM_MATRIX_SIZE; j++) @@ -397,7 +110,6 @@ (mat1->m[i][0] * mat2->m[0][j]) + (mat1->m[i][1] * mat2->m[1][j]) + (mat1->m[i][2] * mat2->m[2][j]); -#endif } @@ -452,5 +164,4 @@ mat->m[2][0] = (sy * l) + i; mat->m[2][1] = (sy * q) - j; mat->m[2][2] = cx * cy; - }