# HG changeset patch # User Matti Hamalainen # Date 1349876143 -10800 # Node ID 159264c2792988735264068c3c74eefda2965aa0 # Parent f07ddc4982c661f6f22324ec768971442ecef5a2 Add some new vector and matrix operations, and introduce some SSE inline assembler optimized versions. diff -r f07ddc4982c6 -r 159264c27929 dmvecmat.c --- a/dmvecmat.c Wed Oct 10 14:08:39 2012 +0300 +++ b/dmvecmat.c Wed Oct 10 16:35:43 2012 +0300 @@ -7,20 +7,164 @@ #include "dmvecmat.h" +void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist) +{ + int i; + for (i = 0; i < nlist; i++) + dm_vector_add(dst + i, src + i); +} + + +void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) +{ + int i; + for (i = 0; i < nlist; i++) + dm_vector_add_r(dst + i, src1 + i, src2 + i); +} + + +void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist) +{ + int i; + for (i = 0; i < nlist; i++) + dm_vector_add(dst + i, src + i); +} + + +void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) +{ + int i; + for (i = 0; i < nlist; i++) + dm_vector_sub_r(dst + i, src1 + i, src2 + i); +} + + +/* Multiply given vector with a matrix + */ +void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat) +{ +#ifdef DM_USE_SIMD + asm volatile( + "mov %1, %%edx\n" + "movups (%%edx), %%xmm4\n" + "movups 16(%%edx), %%xmm5\n" + "movups 32(%%edx), %%xmm6\n" + "movups 48(%%edx), %%xmm7\n" + + // vector -> xmm0 + "movups %2, %%xmm0\n" + + // zero final result in xmm2 + "xorps %%xmm2, %%xmm2\n" + + // perform shuffle and multiply and add whole "column" "X" + "movups %%xmm0, %%xmm1\n" + "shufps $0x00, %%xmm1, %%xmm1\n" + "mulps %%xmm4, %%xmm1\n" + "addps %%xmm1, %%xmm2\n" + + // Y + "movups %%xmm0, %%xmm1\n" + "shufps $0x55, %%xmm1, %%xmm1\n" + "mulps %%xmm5, %%xmm1\n" + "addps %%xmm1, %%xmm2\n" + + // Z + "movups %%xmm0, %%xmm1\n" + "shufps $0xAA, %%xmm1, %%xmm1\n" + "mulps %%xmm6, %%xmm1\n" + "addps %%xmm1, %%xmm2\n" + +#if 0 + // W + "movups %%xmm0, %%xmm1\n" + "shufps $0xFF, %%xmm1, %%xmm1\n" + "mulps %%xmm7, %%xmm1\n" + "addps %%xmm1, %%xmm2\n" +#endif + + // Result -> + "movups %%xmm2, %0\n" + : "=m" (vd) + : "m" (mat), "m" (vs) + : "memory", "%edx", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); +#else + vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]); + vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]); + vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]); +#endif +} + + /* Multiply list of given vectors with given matrix. */ void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat) { int i; +#ifdef DM_USE_SIMD + asm volatile( + "mov %0, %%edx\n" + "movups (%%edx), %%xmm4\n" + "movups 16(%%edx), %%xmm5\n" + "movups 32(%%edx), %%xmm6\n" + "movups 48(%%edx), %%xmm7\n" + : + : "m" (mat) + : "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7" + ); +#endif + for (i = 0; i < nlist; i++) { +#ifdef DM_USE_SIMD + asm volatile + ( + // list[i] -> xmm0 + "movups %1, %%xmm0\n" + + // zero final result in xmm2 + "xorps %%xmm2, %%xmm2\n" + + // perform shuffle and multiply and add whole "column" "X" + "movups %%xmm0, %%xmm1\n" + "shufps $0x00, %%xmm1, %%xmm1\n" + "mulps %%xmm4, %%xmm1\n" + "addps %%xmm1, %%xmm2\n" + + // Y + "movups %%xmm0, %%xmm1\n" + "shufps $0x55, %%xmm1, %%xmm1\n" + "mulps %%xmm5, %%xmm1\n" + "addps %%xmm1, %%xmm2\n" + + // Z + "movups %%xmm0, %%xmm1\n" + "shufps $0xAA, %%xmm1, %%xmm1\n" + "mulps %%xmm6, %%xmm1\n" + "addps %%xmm1, %%xmm2\n" + +#if 0 + // W + "movups %%xmm0, %%xmm1\n" + "shufps $0xFF, %%xmm1, %%xmm1\n" + "mulps %%xmm7, %%xmm1\n" + "addps %%xmm1, %%xmm2\n" +#endif + // Result -> + "movups %%xmm2, %0\n" + : "=m" (list[i]) + : "m" (list[i]) + : "memory", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"); +#else DMVector q; memcpy(&q, &list[i], sizeof(DMVector)); list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]); list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]); list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]); +#endif } } @@ -66,6 +210,19 @@ } +void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2) +{ + int i, j; + + for (i = 0; i < DM_MATRIX_SIZE; i++) + for (j = 0; j < DM_MATRIX_SIZE; j++) + dst->m[i][j] = + (mat1->m[i][0] * mat2->m[0][j]) + + (mat1->m[i][1] * mat2->m[1][j]) + + (mat1->m[i][2] * mat2->m[2][j]); +} + + /* Multiply given list of matrices (size of nMatrices units) with given matrix. */ void dm_matrix_mul_n(DMMatrix * list, const int nlist, const DMMatrix *mat) diff -r f07ddc4982c6 -r 159264c27929 dmvecmat.h --- a/dmvecmat.h Wed Oct 10 14:08:39 2012 +0300 +++ b/dmvecmat.h Wed Oct 10 16:35:43 2012 +0300 @@ -26,12 +26,20 @@ DMFloat m[DM_MATRIX_SIZE][DM_MATRIX_SIZE]; } DMMatrix; + +void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist); +void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist); +void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist); +void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist); + +void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat); void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat); void dm_matrix_unit(DMMatrix *mat); void dm_matrix_transpose(DMMatrix *mat1, const DMMatrix *mat2); void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2); +void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2); void dm_matrix_mul_n(DMMatrix *list, const int nlist, const DMMatrix *mat); void dm_matrix_rot(DMMatrix *mat, @@ -39,14 +47,14 @@ const DMFloat cx, const DMFloat cy, const DMFloat cz); +/* Basic vector operations + */ static inline void dm_vector_copy(DMVector *vd, const DMVector *vs) { memcpy(vd, vs, sizeof(DMVector)); } -/* Basic vector operations - */ static inline void dm_vector_add(DMVector *vr, const DMVector *v2) { #ifdef DM_USE_SIMD @@ -171,16 +179,6 @@ } -/* Multiply given vector with a matrix - */ -static inline void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat) -{ - vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]); - vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]); - vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]); -} - - /* Make rotation matrix from given angles (radians) */ static inline void dm_matrix_rot_a(DMMatrix *mat, const DMFloat ax, const DMFloat ay, const DMFloat az)