changeset 128:afa28947e400

Improve transparent GA alpha scaled blitting function.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 04 Oct 2012 09:32:07 +0300
parents 5102ec9e9bce
children 975725e3126d
files dmblitfunc.h
diffstat 1 files changed, 23 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/dmblitfunc.h	Thu Oct 04 05:38:08 2012 +0300
+++ b/dmblitfunc.h	Thu Oct 04 09:32:07 2012 +0300
@@ -100,7 +100,7 @@
     
 #define DM_BLITFUNC_SRC_TYPE Uint32
 #define DM_BLITFUNC_DST_TYPE Uint32
-#define DM_BLITFUNC_INNER				\
+#define DM_BLITFUNC_INNER			\
     asm(					\
         "movd        %2,     %%mm1\n"		\
         \
@@ -173,6 +173,7 @@
 
 #ifdef DM_USE_SIMD
 #define DM_BLITFUNC_VARS \
+    const Uint32 qpdmask =       0xff000000; \
     const Uint64 qpdrm   = 0xff00ff00ff00ffULL;
 
 #define DM_BLITFUNC_SRC_TYPE Uint32
@@ -186,24 +187,35 @@
         : "m" (alpha)				\
         : "%mm4" );
 
-#define DM_BLITFUNC_INNER				\
+#define DM_BLITFUNC_INNER			\
     asm(					\
         "movd        %2,     %%mm1\n"		\
+        \
+        "movd        %3,     %%mm2\n"		\
+        "movq        %%mm1,  %%mm5\n"		\
+        "pand        %%mm2,  %%mm5\n"		\
+        "psrlw       $8,     %%mm5\n"		\
+        "punpcklwd   %%mm5,  %%mm5\n"		\
+        "punpckhwd   %%mm5,  %%mm5\n"		\
+        \
+        "pmullw      %%mm4,  %%mm5\n"		\
+        "psrlw       $8,     %%mm5\n"		\
+        \
         "pxor        %%mm2,  %%mm2\n"		\
         "movd        %1,     %%mm3\n"		\
         "punpcklbw   %%mm2,  %%mm1\n"		\
         "punpcklbw   %%mm2,  %%mm3\n"		\
         \
         "psubw       %%mm3,  %%mm1\n"		\
-        "pmullw      %%mm4,  %%mm1\n"		\
+        "pmullw      %%mm5,  %%mm1\n"		\
         "psraw       $8,     %%mm1\n"		\
         "paddw       %%mm3,  %%mm1\n"		\
-        "pand        %3,     %%mm1\n"		\
+        "pand        %4,     %%mm1\n"		\
         "packuswb    %%mm2,  %%mm1\n"		\
         "movd        %%mm1,  %0\n"		\
         : "=m" (*dp)				\
-        : "m" (*dp), "m" (sp[FP_GETH(xv)]), "m" (qpdrm)	\
-        : "memory", "%mm1", "%mm2", "%mm3", "%mm4" ); dp++;
+        : "m" (*dp), "m" (sp[FP_GETH(xv)]), "m" (qpdmask), "m" (qpdrm)	\
+        : "memory", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" ); dp++;
 
 #define DM_BLITFUNC_FINISH asm("emms\n");
 
@@ -213,10 +225,11 @@
 #define DM_BLITFUNC_DST_TYPE DMRGBA32
 #define DM_BLITFUNC_INNER \
     const DMRGBA32 q = sp[FP_GETH(xv)]; \
-    dp->r += ((q.r - dp->r) * alpha) >> 8; \
-    dp->g += ((q.g - dp->g) * alpha) >> 8; \
-    dp->b += ((q.b - dp->b) * alpha) >> 8; \
-    dp->a  = alpha; \
+    const int a = (alpha * q.a) >> 8; \
+    dp->r += ((q.r - dp->r) * a) >> 8; \
+    dp->g += ((q.g - dp->g) * a) >> 8; \
+    dp->b += ((q.b - dp->b) * a) >> 8; \
+    dp->a  = a;
     dp++;
 #endif