Mercurial > hg > dmlib
changeset 283:4d42b8910d7e
Using SSE inline asm is not worth it in single operations, it hinders
compiler optimizations too much.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 11 Oct 2012 08:50:03 +0300 |
parents | 175328b20341 |
children | d47e8cafaa63 |
files | dmvecmat.c dmvecmat.h |
diffstat | 2 files changed, 48 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/dmvecmat.c Thu Oct 11 08:08:46 2012 +0300 +++ b/dmvecmat.c Thu Oct 11 08:50:03 2012 +0300 @@ -11,7 +11,19 @@ { int i; for (i = 0; i < nlist; i++) + { +#ifdef DM_USE_SIMD + asm("movups %2, %%xmm1\n" + "movups %1, %%xmm2\n" + "addps %%xmm2, %%xmm1\n" + "movups %%xmm1, %0\n" + : "=m" (dst[i]) + : "m" (dst[i]), "m" (src[i]) + : "memory", "%xmm1", "%xmm2"); +#else dm_vector_add(dst + i, src + i); +#endif + } } @@ -19,7 +31,19 @@ { int i; for (i = 0; i < nlist; i++) + { +#ifdef DM_USE_SIMD + asm("movups %2, %%xmm1\n" + "movups %1, %%xmm2\n" + "addps %%xmm2, %%xmm1\n" + "movups %%xmm1, %0\n" + : "=m" (dst[i]) + : "m" (src1[i]), "m" (src2[i]) + : "memory", "%xmm1", "%xmm2"); +#else dm_vector_add_r(dst + i, src1 + i, src2 + i); +#endif + } } @@ -27,7 +51,19 @@ { int i; for (i = 0; i < nlist; i++) + { +#ifdef DM_USE_SIMD + asm("movups %2, %%xmm1\n" + "movups %1, %%xmm2\n" + "subps %%xmm2, %%xmm1\n" + "movups %%xmm1, %0\n" + : "=m" (dst[i]) + : "m" (dst[i]), "m" (src[i]) + : "memory", "%xmm1", "%xmm2"); +#else dm_vector_add(dst + i, src + i); +#endif + } } @@ -35,7 +71,19 @@ { int i; for (i = 0; i < nlist; i++) + { +#ifdef DM_USE_SIMD + asm("movups %2, %%xmm1\n" + "movups %1, %%xmm2\n" + "subps %%xmm2, %%xmm1\n" + "movups %%xmm1, %0\n" + : "=m" (dst[i]) + : "m" (src1[i]), "m" (src2[i]) + : "memory", "%xmm1", "%xmm2"); +#else dm_vector_sub_r(dst + i, src1 + i, src2 + i); +#endif + } }
--- a/dmvecmat.h Thu Oct 11 08:08:46 2012 +0300 +++ b/dmvecmat.h Thu Oct 11 08:50:03 2012 +0300 @@ -57,73 +57,33 @@ static inline void dm_vector_add(DMVector *vr, const DMVector *v2) { -#ifdef DM_USE_SIMD - asm("movups %2, %%xmm1\n" - "movups %1, %%xmm2\n" - "addps %%xmm2, %%xmm1\n" - "movups %%xmm1, %0\n" - : "=m" (*vr) - : "m" (*vr), "m" (*v2) - : "memory", "%xmm1", "%xmm2"); -#else vr->x += v2->x; vr->y += v2->y; vr->z += v2->z; -#endif } static inline void dm_vector_add_r(DMVector *vr, const DMVector *v1, const DMVector *v2) { -#ifdef DM_USE_SIMD - asm("movups %2, %%xmm1\n" - "movups %1, %%xmm2\n" - "addps %%xmm2, %%xmm1\n" - "movups %%xmm1, %0\n" - : "=m" (*vr) - : "m" (*v1), "m" (*v2) - : "memory", "%xmm1", "%xmm2"); -#else vr->x = v1->x + v2->x; vr->y = v1->y + v2->y; vr->z = v1->z + v2->z; -#endif } static inline void dm_vector_sub(DMVector *vr, const DMVector *v2) { -#ifdef DM_USE_SIMD - asm("movups %2, %%xmm1\n" - "movups %1, %%xmm2\n" - "subps %%xmm2, %%xmm1\n" - "movups %%xmm1, %0\n" - : "=m" (*vr) - : "m" (*vr), "m" (*v2) - : "memory", "%xmm1", "%xmm2"); -#else vr->x -= v2->x; vr->y -= v2->y; vr->z -= v2->z; -#endif } static inline void dm_vector_sub_r(DMVector *vr, const DMVector *v1, const DMVector *v2) { -#ifdef DM_USE_SIMD - asm("movups %2, %%xmm1\n" - "movups %1, %%xmm2\n" - "subps %%xmm2, %%xmm1\n" - "movups %%xmm1, %0\n" - : "=m" (*vr) - : "m" (*v1), "m" (*v2) - : "memory", "%xmm1", "%xmm2"); -#else vr->x = v1->x - v2->x; vr->y = v1->y - v2->y; vr->z = v1->z - v2->z; -#endif }