Mercurial > hg > dmlib
view dmvecmat.c @ 570:a26636faa6b7
Update copyright.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Sat, 05 Jan 2013 19:58:23 +0200 |
parents | 1994cc78ce6c |
children |
line wrap: on
line source
/* * DMLib * -- Vector and matrix functions * Programmed and designed by Matti 'ccr' Hamalainen * (C) Copyright 2011-2012 Tecnic Software productions (TNSP) */ #include "dmvecmat.h" void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist) { int i; for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm("movups %2, %%xmm1\n" "movups %1, %%xmm2\n" "addps %%xmm2, %%xmm1\n" "movups %%xmm1, %0\n" : "=m" (dst[i]) : "m" (dst[i]), "m" (src[i]) : "memory", "%xmm1", "%xmm2"); #else dm_vector_add(dst + i, src + i); #endif } } void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) { int i; for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm("movups %2, %%xmm1\n" "movups %1, %%xmm2\n" "addps %%xmm2, %%xmm1\n" "movups %%xmm1, %0\n" : "=m" (dst[i]) : "m" (src1[i]), "m" (src2[i]) : "memory", "%xmm1", "%xmm2"); #else dm_vector_add_r(dst + i, src1 + i, src2 + i); #endif } } void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist) { int i; for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm("movups %2, %%xmm1\n" "movups %1, %%xmm2\n" "subps %%xmm2, %%xmm1\n" "movups %%xmm1, %0\n" : "=m" (dst[i]) : "m" (dst[i]), "m" (src[i]) : "memory", "%xmm1", "%xmm2"); #else dm_vector_add(dst + i, src + i); #endif } } void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) { int i; for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm("movups %2, %%xmm1\n" "movups %1, %%xmm2\n" "subps %%xmm2, %%xmm1\n" "movups %%xmm1, %0\n" : "=m" (dst[i]) : "m" (src1[i]), "m" (src2[i]) : "memory", "%xmm1", "%xmm2"); #else dm_vector_sub_r(dst + i, src1 + i, src2 + i); #endif } } /* Multiply given vector with a matrix */ void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat) { #ifdef DM_USE_SIMD asm volatile( "mov %1, %%edx\n" "movups (%%edx), %%xmm4\n" "movups 16(%%edx), %%xmm5\n" "movups 32(%%edx), %%xmm6\n" "movups 48(%%edx), %%xmm7\n" // vector -> xmm0 "movups %2, %%xmm0\n" // zero final result in xmm2 "xorps %%xmm2, %%xmm2\n" // perform shuffle and multiply and add whole "column" "X" "movups %%xmm0, %%xmm1\n" "shufps $0x00, %%xmm1, %%xmm1\n" "mulps %%xmm4, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Y "movups %%xmm0, %%xmm1\n" "shufps $0x55, %%xmm1, %%xmm1\n" "mulps %%xmm5, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Z "movups %%xmm0, %%xmm1\n" "shufps $0xAA, %%xmm1, %%xmm1\n" "mulps %%xmm6, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // W "movups %%xmm0, %%xmm1\n" "shufps $0xFF, %%xmm1, %%xmm1\n" "mulps %%xmm7, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Result -> "movups %%xmm2, %0\n" : "=m" (vd) : "m" (mat), "m" (vs) : "memory", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7" ); #else vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]); vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]); vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]); #endif } /* Multiply list of given vectors with given matrix. */ void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat) { int i; #ifdef DM_USE_SIMD asm volatile( "mov %0, %%edx\n" "movups (%%edx), %%xmm4\n" "movups 16(%%edx), %%xmm5\n" "movups 32(%%edx), %%xmm6\n" "movups 48(%%edx), %%xmm7\n" : : "m" (mat) : "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7" ); #endif for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm volatile ( // list[i] -> xmm0 "movups %1, %%xmm0\n" // zero final result in xmm2 "xorps %%xmm2, %%xmm2\n" // perform shuffle and multiply and add whole "column" "X" "movups %%xmm0, %%xmm1\n" "shufps $0x00, %%xmm1, %%xmm1\n" "mulps %%xmm4, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Y "movups %%xmm0, %%xmm1\n" "shufps $0x55, %%xmm1, %%xmm1\n" "mulps %%xmm5, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Z "movups %%xmm0, %%xmm1\n" "shufps $0xAA, %%xmm1, %%xmm1\n" "mulps %%xmm6, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // W "movups %%xmm0, %%xmm1\n" "shufps $0xFF, %%xmm1, %%xmm1\n" "mulps %%xmm7, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Result -> "movups %%xmm2, %0\n" : "=m" (list[i]) : "m" (list[i]) : "memory", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"); #else DMVector q; memcpy(&q, &list[i], sizeof(DMVector)); list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]); list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]); list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]); #endif } } /* Set matrix to unit-matrix */ void dm_matrix_unit(DMMatrix *mat) { memset(mat, 0, sizeof(DMMatrix)); mat->m[0][0] = 1.0f; mat->m[1][1] = 1.0f; mat->m[2][2] = 1.0f; mat->m[3][3] = 1.0f; } /* Transpose the matrix mat2 to mat1 */ void dm_matrix_transpose(DMMatrix *mat1, const DMMatrix *mat2) { int i, j; for (i = 0; i < DM_MATRIX_SIZE; i++) for (j = 0; j < DM_MATRIX_SIZE; j++) mat1->m[i][j] = mat2->m[j][i]; } /* Multiply matrices mat1 and mat2, putting result into mat1 */ void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2) { #ifdef DM_USE_SIMD asm volatile( "mov %1, %%ebx\n" "mov %2, %%edx\n" // -------------------------------------------------- // 0 "movups (%%ebx), %%xmm0\n" // mat1[0] "movups (%%edx), %%xmm1\n" // mat2[0] "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] "mulps %%xmm0, %%xmm1\n" "movups %%xmm1, %%xmm3\n" // 1 "movups 16(%%ebx), %%xmm0\n" // mat1[0] "movups (%%edx), %%xmm1\n" // mat2[0] "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 2 "movups 32(%%ebx), %%xmm0\n" // mat1[0] "movups (%%edx), %%xmm1\n" // mat2[0] "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 3 "movups 48(%%ebx), %%xmm0\n" // mat1[0] "movups (%%edx), %%xmm1\n" // mat2[0] "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" "mov %0, %%ebx\n" "movups %%xmm3, (%%ebx)\n" // -------------------------------------------------- "mov %1, %%ebx\n" // 0 "movups (%%ebx), %%xmm0\n" // mat1[0] "movups 16(%%edx), %%xmm1\n" // mat2[1] "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] "mulps %%xmm0, %%xmm1\n" "movups %%xmm1, %%xmm3\n" // 1 "movups 16(%%ebx), %%xmm0\n" // mat1[0] "movups 16(%%edx), %%xmm1\n" // mat2[1] "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 2 "movups 32(%%ebx), %%xmm0\n" // mat1[0] "movups 16(%%edx), %%xmm1\n" // mat2[1] "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 3 "movups 48(%%ebx), %%xmm0\n" // mat1[0] "movups 16(%%edx), %%xmm1\n" // mat2[1] "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" "mov %0, %%ebx\n" "movups %%xmm3, 16(%%ebx)\n" // -------------------------------------------------- "mov %1, %%ebx\n" // 0 "movups (%%ebx), %%xmm0\n" // mat1[0] "movups 32(%%edx), %%xmm1\n" // mat2[1] "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] "mulps %%xmm0, %%xmm1\n" "movups %%xmm1, %%xmm3\n" // 1 "movups 16(%%ebx), %%xmm0\n" // mat1[0] "movups 32(%%edx), %%xmm1\n" // mat2[1] "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 2 "movups 32(%%ebx), %%xmm0\n" // mat1[0] "movups 32(%%edx), %%xmm1\n" // mat2[1] "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 3 "movups 48(%%ebx), %%xmm0\n" // mat1[0] "movups 32(%%edx), %%xmm1\n" // mat2[1] "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" "mov %0, %%ebx\n" "movups %%xmm3, 32(%%ebx)\n" // -------------------------------------------------- "mov %1, %%ebx\n" // 0 "movups (%%ebx), %%xmm0\n" // mat1[0] "movups 48(%%edx), %%xmm1\n" // mat2[1] "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] "mulps %%xmm0, %%xmm1\n" "movups %%xmm1, %%xmm3\n" // 1 "movups 16(%%ebx), %%xmm0\n" // mat1[0] "movups 48(%%edx), %%xmm1\n" // mat2[1] "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 2 "movups 32(%%ebx), %%xmm0\n" // mat1[0] "movups 48(%%edx), %%xmm1\n" // mat2[1] "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 3 "movups 48(%%ebx), %%xmm0\n" // mat1[0] "movups 48(%%edx), %%xmm1\n" // mat2[1] "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" "mov %0, %%ebx\n" "movups %%xmm3, 48(%%ebx)\n" : "=m" (dst) : "m" (mat1), "m" (mat2) : "memory", "%edx", "%ebx", "%xmm0", "%xmm2", "%xmm3" ); #else int i, j; for (i = 0; i < DM_MATRIX_SIZE; i++) for (j = 0; j < DM_MATRIX_SIZE; j++) dst->m[i][j] = (mat1->m[i][0] * mat2->m[0][j]) + (mat1->m[i][1] * mat2->m[1][j]) + (mat1->m[i][2] * mat2->m[2][j]); #endif } void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2) { DMMatrix tmpM; dm_matrix_mul_r(&tmpM, mat1, mat2); memcpy(mat1, &tmpM, sizeof(DMMatrix)); } /* Multiply given list of matrices (size of nMatrices units) with given matrix. */ void dm_matrix_mul_n(DMMatrix * list, const int nlist, const DMMatrix *mat) { int i; for (i = 0; i < nlist; i++) dm_matrix_mul(&list[i], mat); } /* Optimized rotation matrix creation */ void dm_matrix_rot(DMMatrix *mat, const DMFloat sx, const DMFloat sy, const DMFloat sz, const DMFloat cx, const DMFloat cy, const DMFloat cz) { const DMFloat q = cx * sz, l = cx * cz, i = sx * sz, j = sx * cz; memset(mat, 0, sizeof(DMMatrix)); mat->m[0][0] = cy * cz; mat->m[0][1] = cy * sz; mat->m[0][2] = -sy; mat->m[1][0] = (sy * j) - q; mat->m[1][1] = (sy * i) + l; mat->m[1][2] = sx * cy; mat->m[2][0] = (sy * l) + i; mat->m[2][1] = (sy * q) - j; mat->m[2][2] = cx * cy; mat->m[3][3] = 1.0f; }