changeset 12:35b0c83af3ca

MMX-version of scaled 32-bit RGBA alpha blending blit works now.
author Matti Hamalainen <ccr@tnsp.org>
date Fri, 28 Sep 2012 09:35:25 +0300
parents eb0072860fb0
children 07bace8425d9
files dmblitfunc.h
diffstat 1 files changed, 21 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/dmblitfunc.h	Fri Sep 28 09:32:45 2012 +0300
+++ b/dmblitfunc.h	Fri Sep 28 09:35:25 2012 +0300
@@ -97,35 +97,44 @@
 
 #ifdef DM_USE_SIMD
 #define DM_SCALED_INNER_INIT \
-    const Uint32 qpdmask = 0xff000000;
+    const Uint32 qpdmask = 0xff000000; \
+    const Uint64 qpdrm   = 0xff00ff00ff00ffULL;
     
 #define DM_SCALED_SRC_TYPE Uint32
 #define DM_SCALED_DST_TYPE Uint32
 #define DM_SCALED_INNER				\
     asm(					\
         "movd        %2,     %%mm1\n"		\
+        \
+        "movd        %3,     %%mm2\n"		\
         "movq        %%mm1,  %%mm3\n"		\
-        "movd        %3,     %%mm5\n"		\
-        "pand        %%mm5,  %%mm3\n"		\
+        "pand        %%mm2,  %%mm3\n"		\
         "movq        %%mm3,  %%mm4\n"		\
-        "psrlq       $8,     %%mm4\n"		\
-        "por         %%mm4,  %%mm3\n"		\
-        "psrlq       $16,    %%mm3\n"		\
+        "psrlq       $8,     %%mm3\n"		\
+        "por         %%mm3,  %%mm4\n"		\
+        "movq        %%mm4,  %%mm3\n"		\
+        "psrlq       $16,    %%mm4\n"		\
         "por         %%mm3,  %%mm4\n"		\
+        "pxor        %%mm5,  %%mm5\n"		\
+        "movq        %%mm5,  %%mm2\n"		\
+        "punpcklbw   %%mm5,  %%mm4\n"		\
+        "movq        %%mm4,  %%mm5\n"		\
         \
-        "pxor        %%mm2,  %%mm2\n"		\
-        "punpcklbw   %%mm2,  %%mm4\n"		\
+        "movq        %%mm2,  %%mm4\n"		\
         \
         "movd        %1,     %%mm3\n"		\
         "punpcklbw   %%mm2,  %%mm1\n"		\
-        "punpcklbw   %%mm2,  %%mm3\n"		\
+        "punpcklbw   %%mm4,  %%mm3\n"		\
+        \
         "psubw       %%mm3,  %%mm1\n"		\
-        "pmullw      %%mm4,  %%mm1\n"		\
-        "psrlw       $8,     %%mm1\n"		\
+        "pmullw      %%mm5,  %%mm1\n"		\
+        "psraw       $8,     %%mm1\n"		\
+        "paddw       %%mm3,  %%mm1\n"		\
+        "pand        %4,  %%mm1\n"		\
         "packuswb    %%mm2,  %%mm1\n"		\
         "movd        %%mm1,  %0\n"		\
         : "=m" (*dp)				\
-        : "m" (*dp), "m" (sp[FP_GETH(xv)]), "m" (qpdmask)	\
+        : "m" (*dp), "m" (sp[FP_GETH(xv)]), "m" (qpdmask), "m" (qpdrm)	\
         : "memory" ); dp++;
 
 #define DM_SCALED_FINISH asm("emms\n");