Mercurial > hg > dmlib
view dmvecmat.c @ 510:43ea59887c69
Start work on making C64 formats encoding possible by changing DMDecodeOps
to DMEncDecOps and adding fields and op enums for custom encode functions, renaming,
etc. Split generic op sanity checking into a separate function in
preparation for its use in generic encoding function.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Mon, 19 Nov 2012 15:06:01 +0200 |
parents | 1994cc78ce6c |
children |
line wrap: on
line source
/* * DMLib * -- Vector and matrix functions * Programmed and designed by Matti 'ccr' Hamalainen * (C) Copyright 2011-2012 Tecnic Software productions (TNSP) */ #include "dmvecmat.h" void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist) { int i; for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm("movups %2, %%xmm1\n" "movups %1, %%xmm2\n" "addps %%xmm2, %%xmm1\n" "movups %%xmm1, %0\n" : "=m" (dst[i]) : "m" (dst[i]), "m" (src[i]) : "memory", "%xmm1", "%xmm2"); #else dm_vector_add(dst + i, src + i); #endif } } void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) { int i; for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm("movups %2, %%xmm1\n" "movups %1, %%xmm2\n" "addps %%xmm2, %%xmm1\n" "movups %%xmm1, %0\n" : "=m" (dst[i]) : "m" (src1[i]), "m" (src2[i]) : "memory", "%xmm1", "%xmm2"); #else dm_vector_add_r(dst + i, src1 + i, src2 + i); #endif } } void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist) { int i; for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm("movups %2, %%xmm1\n" "movups %1, %%xmm2\n" "subps %%xmm2, %%xmm1\n" "movups %%xmm1, %0\n" : "=m" (dst[i]) : "m" (dst[i]), "m" (src[i]) : "memory", "%xmm1", "%xmm2"); #else dm_vector_add(dst + i, src + i); #endif } } void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist) { int i; for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm("movups %2, %%xmm1\n" "movups %1, %%xmm2\n" "subps %%xmm2, %%xmm1\n" "movups %%xmm1, %0\n" : "=m" (dst[i]) : "m" (src1[i]), "m" (src2[i]) : "memory", "%xmm1", "%xmm2"); #else dm_vector_sub_r(dst + i, src1 + i, src2 + i); #endif } } /* Multiply given vector with a matrix */ void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat) { #ifdef DM_USE_SIMD asm volatile( "mov %1, %%edx\n" "movups (%%edx), %%xmm4\n" "movups 16(%%edx), %%xmm5\n" "movups 32(%%edx), %%xmm6\n" "movups 48(%%edx), %%xmm7\n" // vector -> xmm0 "movups %2, %%xmm0\n" // zero final result in xmm2 "xorps %%xmm2, %%xmm2\n" // perform shuffle and multiply and add whole "column" "X" "movups %%xmm0, %%xmm1\n" "shufps $0x00, %%xmm1, %%xmm1\n" "mulps %%xmm4, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Y "movups %%xmm0, %%xmm1\n" "shufps $0x55, %%xmm1, %%xmm1\n" "mulps %%xmm5, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Z "movups %%xmm0, %%xmm1\n" "shufps $0xAA, %%xmm1, %%xmm1\n" "mulps %%xmm6, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // W "movups %%xmm0, %%xmm1\n" "shufps $0xFF, %%xmm1, %%xmm1\n" "mulps %%xmm7, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Result -> "movups %%xmm2, %0\n" : "=m" (vd) : "m" (mat), "m" (vs) : "memory", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7" ); #else vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]); vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]); vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]); #endif } /* Multiply list of given vectors with given matrix. */ void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat) { int i; #ifdef DM_USE_SIMD asm volatile( "mov %0, %%edx\n" "movups (%%edx), %%xmm4\n" "movups 16(%%edx), %%xmm5\n" "movups 32(%%edx), %%xmm6\n" "movups 48(%%edx), %%xmm7\n" : : "m" (mat) : "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7" ); #endif for (i = 0; i < nlist; i++) { #ifdef DM_USE_SIMD asm volatile ( // list[i] -> xmm0 "movups %1, %%xmm0\n" // zero final result in xmm2 "xorps %%xmm2, %%xmm2\n" // perform shuffle and multiply and add whole "column" "X" "movups %%xmm0, %%xmm1\n" "shufps $0x00, %%xmm1, %%xmm1\n" "mulps %%xmm4, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Y "movups %%xmm0, %%xmm1\n" "shufps $0x55, %%xmm1, %%xmm1\n" "mulps %%xmm5, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Z "movups %%xmm0, %%xmm1\n" "shufps $0xAA, %%xmm1, %%xmm1\n" "mulps %%xmm6, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // W "movups %%xmm0, %%xmm1\n" "shufps $0xFF, %%xmm1, %%xmm1\n" "mulps %%xmm7, %%xmm1\n" "addps %%xmm1, %%xmm2\n" // Result -> "movups %%xmm2, %0\n" : "=m" (list[i]) : "m" (list[i]) : "memory", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"); #else DMVector q; memcpy(&q, &list[i], sizeof(DMVector)); list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]); list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]); list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]); #endif } } /* Set matrix to unit-matrix */ void dm_matrix_unit(DMMatrix *mat) { memset(mat, 0, sizeof(DMMatrix)); mat->m[0][0] = 1.0f; mat->m[1][1] = 1.0f; mat->m[2][2] = 1.0f; mat->m[3][3] = 1.0f; } /* Transpose the matrix mat2 to mat1 */ void dm_matrix_transpose(DMMatrix *mat1, const DMMatrix *mat2) { int i, j; for (i = 0; i < DM_MATRIX_SIZE; i++) for (j = 0; j < DM_MATRIX_SIZE; j++) mat1->m[i][j] = mat2->m[j][i]; } /* Multiply matrices mat1 and mat2, putting result into mat1 */ void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2) { #ifdef DM_USE_SIMD asm volatile( "mov %1, %%ebx\n" "mov %2, %%edx\n" // -------------------------------------------------- // 0 "movups (%%ebx), %%xmm0\n" // mat1[0] "movups (%%edx), %%xmm1\n" // mat2[0] "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] "mulps %%xmm0, %%xmm1\n" "movups %%xmm1, %%xmm3\n" // 1 "movups 16(%%ebx), %%xmm0\n" // mat1[0] "movups (%%edx), %%xmm1\n" // mat2[0] "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 2 "movups 32(%%ebx), %%xmm0\n" // mat1[0] "movups (%%edx), %%xmm1\n" // mat2[0] "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 3 "movups 48(%%ebx), %%xmm0\n" // mat1[0] "movups (%%edx), %%xmm1\n" // mat2[0] "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" "mov %0, %%ebx\n" "movups %%xmm3, (%%ebx)\n" // -------------------------------------------------- "mov %1, %%ebx\n" // 0 "movups (%%ebx), %%xmm0\n" // mat1[0] "movups 16(%%edx), %%xmm1\n" // mat2[1] "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] "mulps %%xmm0, %%xmm1\n" "movups %%xmm1, %%xmm3\n" // 1 "movups 16(%%ebx), %%xmm0\n" // mat1[0] "movups 16(%%edx), %%xmm1\n" // mat2[1] "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 2 "movups 32(%%ebx), %%xmm0\n" // mat1[0] "movups 16(%%edx), %%xmm1\n" // mat2[1] "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 3 "movups 48(%%ebx), %%xmm0\n" // mat1[0] "movups 16(%%edx), %%xmm1\n" // mat2[1] "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" "mov %0, %%ebx\n" "movups %%xmm3, 16(%%ebx)\n" // -------------------------------------------------- "mov %1, %%ebx\n" // 0 "movups (%%ebx), %%xmm0\n" // mat1[0] "movups 32(%%edx), %%xmm1\n" // mat2[1] "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] "mulps %%xmm0, %%xmm1\n" "movups %%xmm1, %%xmm3\n" // 1 "movups 16(%%ebx), %%xmm0\n" // mat1[0] "movups 32(%%edx), %%xmm1\n" // mat2[1] "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 2 "movups 32(%%ebx), %%xmm0\n" // mat1[0] "movups 32(%%edx), %%xmm1\n" // mat2[1] "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 3 "movups 48(%%ebx), %%xmm0\n" // mat1[0] "movups 32(%%edx), %%xmm1\n" // mat2[1] "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" "mov %0, %%ebx\n" "movups %%xmm3, 32(%%ebx)\n" // -------------------------------------------------- "mov %1, %%ebx\n" // 0 "movups (%%ebx), %%xmm0\n" // mat1[0] "movups 48(%%edx), %%xmm1\n" // mat2[1] "shufps $0x00, %%xmm1, %%xmm1\n" // mat2[0][0] "mulps %%xmm0, %%xmm1\n" "movups %%xmm1, %%xmm3\n" // 1 "movups 16(%%ebx), %%xmm0\n" // mat1[0] "movups 48(%%edx), %%xmm1\n" // mat2[1] "shufps $0x55, %%xmm1, %%xmm1\n" // mat2[0][1] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 2 "movups 32(%%ebx), %%xmm0\n" // mat1[0] "movups 48(%%edx), %%xmm1\n" // mat2[1] "shufps $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" // 3 "movups 48(%%ebx), %%xmm0\n" // mat1[0] "movups 48(%%edx), %%xmm1\n" // mat2[1] "shufps $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3] "mulps %%xmm0, %%xmm1\n" "addps %%xmm1, %%xmm3\n" "mov %0, %%ebx\n" "movups %%xmm3, 48(%%ebx)\n" : "=m" (dst) : "m" (mat1), "m" (mat2) : "memory", "%edx", "%ebx", "%xmm0", "%xmm2", "%xmm3" ); #else int i, j; for (i = 0; i < DM_MATRIX_SIZE; i++) for (j = 0; j < DM_MATRIX_SIZE; j++) dst->m[i][j] = (mat1->m[i][0] * mat2->m[0][j]) + (mat1->m[i][1] * mat2->m[1][j]) + (mat1->m[i][2] * mat2->m[2][j]); #endif } void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2) { DMMatrix tmpM; dm_matrix_mul_r(&tmpM, mat1, mat2); memcpy(mat1, &tmpM, sizeof(DMMatrix)); } /* Multiply given list of matrices (size of nMatrices units) with given matrix. */ void dm_matrix_mul_n(DMMatrix * list, const int nlist, const DMMatrix *mat) { int i; for (i = 0; i < nlist; i++) dm_matrix_mul(&list[i], mat); } /* Optimized rotation matrix creation */ void dm_matrix_rot(DMMatrix *mat, const DMFloat sx, const DMFloat sy, const DMFloat sz, const DMFloat cx, const DMFloat cy, const DMFloat cz) { const DMFloat q = cx * sz, l = cx * cz, i = sx * sz, j = sx * cz; memset(mat, 0, sizeof(DMMatrix)); mat->m[0][0] = cy * cz; mat->m[0][1] = cy * sz; mat->m[0][2] = -sy; mat->m[1][0] = (sy * j) - q; mat->m[1][1] = (sy * i) + l; mat->m[1][2] = sx * cy; mat->m[2][0] = (sy * l) + i; mat->m[2][1] = (sy * q) - j; mat->m[2][2] = cx * cy; mat->m[3][3] = 1.0f; }