changeset 283:4d42b8910d7e

Using SSE inline asm is not worth it in single operations, it hinders compiler optimizations too much.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 11 Oct 2012 08:50:03 +0300
parents 175328b20341
children d47e8cafaa63
files dmvecmat.c dmvecmat.h
diffstat 2 files changed, 48 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/dmvecmat.c	Thu Oct 11 08:08:46 2012 +0300
+++ b/dmvecmat.c	Thu Oct 11 08:50:03 2012 +0300
@@ -11,7 +11,19 @@
 {
     int i;
     for (i = 0; i < nlist; i++)
+    {
+#ifdef DM_USE_SIMD
+    asm("movups      %2,     %%xmm1\n"
+        "movups      %1,     %%xmm2\n"
+        "addps       %%xmm2, %%xmm1\n"
+        "movups      %%xmm1, %0\n"
+        : "=m" (dst[i])
+        : "m" (dst[i]), "m" (src[i])
+        : "memory", "%xmm1", "%xmm2");
+#else
         dm_vector_add(dst + i, src + i);
+#endif
+    }
 }
 
 
@@ -19,7 +31,19 @@
 {
     int i;
     for (i = 0; i < nlist; i++)
+    {
+#ifdef DM_USE_SIMD
+    asm("movups      %2,     %%xmm1\n"
+        "movups      %1,     %%xmm2\n"
+        "addps       %%xmm2, %%xmm1\n"
+        "movups      %%xmm1, %0\n"
+        : "=m" (dst[i])
+        : "m" (src1[i]), "m" (src2[i])
+        : "memory", "%xmm1", "%xmm2");
+#else
         dm_vector_add_r(dst + i, src1 + i, src2 + i);
+#endif
+    }
 }
 
 
@@ -27,7 +51,19 @@
 {
     int i;
     for (i = 0; i < nlist; i++)
+    {
+#ifdef DM_USE_SIMD
+    asm("movups      %2,     %%xmm1\n"
+        "movups      %1,     %%xmm2\n"
+        "subps       %%xmm2, %%xmm1\n"
+        "movups      %%xmm1, %0\n"
+        : "=m" (dst[i])
+        : "m" (dst[i]), "m" (src[i])
+        : "memory", "%xmm1", "%xmm2");
+#else
         dm_vector_add(dst + i, src + i);
+#endif
+    }
 }
 
 
@@ -35,7 +71,19 @@
 {
     int i;
     for (i = 0; i < nlist; i++)
+    {
+#ifdef DM_USE_SIMD
+    asm("movups      %2,     %%xmm1\n"
+        "movups      %1,     %%xmm2\n"
+        "subps       %%xmm2, %%xmm1\n"
+        "movups      %%xmm1, %0\n"
+        : "=m" (dst[i])
+        : "m" (src1[i]), "m" (src2[i])
+        : "memory", "%xmm1", "%xmm2");
+#else
         dm_vector_sub_r(dst + i, src1 + i, src2 + i);
+#endif
+    }
 }
 
 
--- a/dmvecmat.h	Thu Oct 11 08:08:46 2012 +0300
+++ b/dmvecmat.h	Thu Oct 11 08:50:03 2012 +0300
@@ -57,73 +57,33 @@
 
 static inline void dm_vector_add(DMVector *vr, const DMVector *v2)
 {
-#ifdef DM_USE_SIMD
-    asm("movups      %2,     %%xmm1\n"
-        "movups      %1,     %%xmm2\n"
-        "addps       %%xmm2, %%xmm1\n"
-        "movups      %%xmm1, %0\n"
-        : "=m" (*vr)
-        : "m" (*vr), "m" (*v2)
-        : "memory", "%xmm1", "%xmm2");
-#else
     vr->x += v2->x;
     vr->y += v2->y;
     vr->z += v2->z;
-#endif
 }
 
 
 static inline void dm_vector_add_r(DMVector *vr, const DMVector *v1, const DMVector *v2)
 {
-#ifdef DM_USE_SIMD
-    asm("movups      %2,     %%xmm1\n"
-        "movups      %1,     %%xmm2\n"
-        "addps       %%xmm2, %%xmm1\n"
-        "movups      %%xmm1, %0\n"
-        : "=m" (*vr)
-        : "m" (*v1), "m" (*v2)
-        : "memory", "%xmm1", "%xmm2");
-#else
     vr->x = v1->x + v2->x;
     vr->y = v1->y + v2->y;
     vr->z = v1->z + v2->z;
-#endif
 }
 
 
 static inline void dm_vector_sub(DMVector *vr, const DMVector *v2)
 {
-#ifdef DM_USE_SIMD
-    asm("movups      %2,     %%xmm1\n"
-        "movups      %1,     %%xmm2\n"
-        "subps       %%xmm2, %%xmm1\n"
-        "movups      %%xmm1, %0\n"
-        : "=m" (*vr)
-        : "m" (*vr), "m" (*v2)
-        : "memory", "%xmm1", "%xmm2");
-#else
     vr->x -= v2->x;
     vr->y -= v2->y;
     vr->z -= v2->z;
-#endif
 }
 
 
 static inline void dm_vector_sub_r(DMVector *vr, const DMVector *v1, const DMVector *v2)
 {
-#ifdef DM_USE_SIMD
-    asm("movups      %2,     %%xmm1\n"
-        "movups      %1,     %%xmm2\n"
-        "subps       %%xmm2, %%xmm1\n"
-        "movups      %%xmm1, %0\n"
-        : "=m" (*vr)
-        : "m" (*v1), "m" (*v2)
-        : "memory", "%xmm1", "%xmm2");
-#else
     vr->x = v1->x - v2->x;
     vr->y = v1->y - v2->y;
     vr->z = v1->z - v2->z;
-#endif
 }