Mercurial > hg > dmlib
changeset 12:35b0c83af3ca
MMX-version of scaled 32-bit RGBA alpha blending blit works now.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Fri, 28 Sep 2012 09:35:25 +0300 |
parents | eb0072860fb0 |
children | 07bace8425d9 |
files | dmblitfunc.h |
diffstat | 1 files changed, 21 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/dmblitfunc.h Fri Sep 28 09:32:45 2012 +0300 +++ b/dmblitfunc.h Fri Sep 28 09:35:25 2012 +0300 @@ -97,35 +97,44 @@ #ifdef DM_USE_SIMD #define DM_SCALED_INNER_INIT \ - const Uint32 qpdmask = 0xff000000; + const Uint32 qpdmask = 0xff000000; \ + const Uint64 qpdrm = 0xff00ff00ff00ffULL; #define DM_SCALED_SRC_TYPE Uint32 #define DM_SCALED_DST_TYPE Uint32 #define DM_SCALED_INNER \ asm( \ "movd %2, %%mm1\n" \ + \ + "movd %3, %%mm2\n" \ "movq %%mm1, %%mm3\n" \ - "movd %3, %%mm5\n" \ - "pand %%mm5, %%mm3\n" \ + "pand %%mm2, %%mm3\n" \ "movq %%mm3, %%mm4\n" \ - "psrlq $8, %%mm4\n" \ - "por %%mm4, %%mm3\n" \ - "psrlq $16, %%mm3\n" \ + "psrlq $8, %%mm3\n" \ + "por %%mm3, %%mm4\n" \ + "movq %%mm4, %%mm3\n" \ + "psrlq $16, %%mm4\n" \ "por %%mm3, %%mm4\n" \ + "pxor %%mm5, %%mm5\n" \ + "movq %%mm5, %%mm2\n" \ + "punpcklbw %%mm5, %%mm4\n" \ + "movq %%mm4, %%mm5\n" \ \ - "pxor %%mm2, %%mm2\n" \ - "punpcklbw %%mm2, %%mm4\n" \ + "movq %%mm2, %%mm4\n" \ \ "movd %1, %%mm3\n" \ "punpcklbw %%mm2, %%mm1\n" \ - "punpcklbw %%mm2, %%mm3\n" \ + "punpcklbw %%mm4, %%mm3\n" \ + \ "psubw %%mm3, %%mm1\n" \ - "pmullw %%mm4, %%mm1\n" \ - "psrlw $8, %%mm1\n" \ + "pmullw %%mm5, %%mm1\n" \ + "psraw $8, %%mm1\n" \ + "paddw %%mm3, %%mm1\n" \ + "pand %4, %%mm1\n" \ "packuswb %%mm2, %%mm1\n" \ "movd %%mm1, %0\n" \ : "=m" (*dp) \ - : "m" (*dp), "m" (sp[FP_GETH(xv)]), "m" (qpdmask) \ + : "m" (*dp), "m" (sp[FP_GETH(xv)]), "m" (qpdmask), "m" (qpdrm) \ : "memory" ); dp++; #define DM_SCALED_FINISH asm("emms\n");