view dmvecmat.c @ 510:43ea59887c69

Start work on making C64 formats encoding possible by changing DMDecodeOps to DMEncDecOps and adding fields and op enums for custom encode functions, renaming, etc. Split generic op sanity checking into a separate function in preparation for its use in generic encoding function.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 19 Nov 2012 15:06:01 +0200
parents 1994cc78ce6c
children
line wrap: on
line source

/*
 * DMLib
 * -- Vector and matrix functions
 * Programmed and designed by Matti 'ccr' Hamalainen
 * (C) Copyright 2011-2012 Tecnic Software productions (TNSP)
 */
#include "dmvecmat.h"


void dm_vector_add_n(DMVector *dst, const DMVector *src, const int nlist)
{
    int i;
    for (i = 0; i < nlist; i++)
    {
#ifdef DM_USE_SIMD
    asm("movups      %2,     %%xmm1\n"
        "movups      %1,     %%xmm2\n"
        "addps       %%xmm2, %%xmm1\n"
        "movups      %%xmm1, %0\n"
        : "=m" (dst[i])
        : "m" (dst[i]), "m" (src[i])
        : "memory", "%xmm1", "%xmm2");
#else
        dm_vector_add(dst + i, src + i);
#endif
    }
}


void dm_vector_add_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist)
{
    int i;
    for (i = 0; i < nlist; i++)
    {
#ifdef DM_USE_SIMD
    asm("movups      %2,     %%xmm1\n"
        "movups      %1,     %%xmm2\n"
        "addps       %%xmm2, %%xmm1\n"
        "movups      %%xmm1, %0\n"
        : "=m" (dst[i])
        : "m" (src1[i]), "m" (src2[i])
        : "memory", "%xmm1", "%xmm2");
#else
        dm_vector_add_r(dst + i, src1 + i, src2 + i);
#endif
    }
}


void dm_vector_sub_n(DMVector *dst, const DMVector *src, const int nlist)
{
    int i;
    for (i = 0; i < nlist; i++)
    {
#ifdef DM_USE_SIMD
    asm("movups      %2,     %%xmm1\n"
        "movups      %1,     %%xmm2\n"
        "subps       %%xmm2, %%xmm1\n"
        "movups      %%xmm1, %0\n"
        : "=m" (dst[i])
        : "m" (dst[i]), "m" (src[i])
        : "memory", "%xmm1", "%xmm2");
#else
        dm_vector_add(dst + i, src + i);
#endif
    }
}


void dm_vector_sub_r_n(DMVector *dst, const DMVector *src1, const DMVector *src2, const int nlist)
{
    int i;
    for (i = 0; i < nlist; i++)
    {
#ifdef DM_USE_SIMD
    asm("movups      %2,     %%xmm1\n"
        "movups      %1,     %%xmm2\n"
        "subps       %%xmm2, %%xmm1\n"
        "movups      %%xmm1, %0\n"
        : "=m" (dst[i])
        : "m" (src1[i]), "m" (src2[i])
        : "memory", "%xmm1", "%xmm2");
#else
        dm_vector_sub_r(dst + i, src1 + i, src2 + i);
#endif
    }
}


/* Multiply given vector with a matrix
 */
void dm_vector_mul_by_mat(DMVector *vd, const DMVector *vs, const DMMatrix *mat)
{
#ifdef DM_USE_SIMD
    asm volatile(
        "mov           %1,        %%edx\n"
        "movups        (%%edx),   %%xmm4\n"
        "movups      16(%%edx),   %%xmm5\n"
        "movups      32(%%edx),   %%xmm6\n"
        "movups      48(%%edx),   %%xmm7\n"

        // vector -> xmm0
        "movups      %2,     %%xmm0\n"
        
        // zero final result in xmm2
        "xorps       %%xmm2, %%xmm2\n"

        // perform shuffle and multiply and add whole "column" "X"
        "movups      %%xmm0, %%xmm1\n"
        "shufps      $0x00,  %%xmm1, %%xmm1\n"
        "mulps       %%xmm4, %%xmm1\n"
        "addps       %%xmm1, %%xmm2\n"

        // Y
        "movups      %%xmm0, %%xmm1\n"
        "shufps      $0x55,  %%xmm1, %%xmm1\n"
        "mulps       %%xmm5, %%xmm1\n"
        "addps       %%xmm1, %%xmm2\n"

        // Z
        "movups      %%xmm0, %%xmm1\n"
        "shufps      $0xAA,  %%xmm1, %%xmm1\n"
        "mulps       %%xmm6, %%xmm1\n"
        "addps       %%xmm1, %%xmm2\n"

        // W
        "movups      %%xmm0, %%xmm1\n"
        "shufps      $0xFF,  %%xmm1, %%xmm1\n"
        "mulps       %%xmm7, %%xmm1\n"
        "addps       %%xmm1, %%xmm2\n"

        // Result ->
        "movups      %%xmm2, %0\n"
        : "=m" (vd)
        : "m" (mat), "m" (vs)
        : "memory", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
        );
#else
    vd->x = (vs->x * mat->m[0][0]) + (vs->y * mat->m[1][0]) + (vs->z * mat->m[2][0]);
    vd->y = (vs->x * mat->m[0][1]) + (vs->y * mat->m[1][1]) + (vs->z * mat->m[2][1]);
    vd->z = (vs->x * mat->m[0][2]) + (vs->y * mat->m[1][2]) + (vs->z * mat->m[2][2]);
#endif
}


/* Multiply list of given vectors with given matrix.
 */
void dm_vector_mul_by_mat_n(DMVector *list, const int nlist, const DMMatrix *mat)
{
    int i;

#ifdef DM_USE_SIMD
    asm volatile(
        "mov           %0,        %%edx\n"
        "movups        (%%edx),   %%xmm4\n"
        "movups      16(%%edx),   %%xmm5\n"
        "movups      32(%%edx),   %%xmm6\n"
        "movups      48(%%edx),   %%xmm7\n"
        :
        : "m" (mat)
        : "%edx", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
        );
#endif

    for (i = 0; i < nlist; i++)
    {
#ifdef DM_USE_SIMD
        asm volatile
            (
            // list[i] -> xmm0
            "movups      %1,     %%xmm0\n"
            
            // zero final result in xmm2
            "xorps       %%xmm2, %%xmm2\n"

            // perform shuffle and multiply and add whole "column" "X"
            "movups      %%xmm0, %%xmm1\n"
            "shufps      $0x00,  %%xmm1, %%xmm1\n"
            "mulps       %%xmm4, %%xmm1\n"
            "addps       %%xmm1, %%xmm2\n"

            // Y
            "movups      %%xmm0, %%xmm1\n"
            "shufps      $0x55,  %%xmm1, %%xmm1\n"
            "mulps       %%xmm5, %%xmm1\n"
            "addps       %%xmm1, %%xmm2\n"

            // Z
            "movups      %%xmm0, %%xmm1\n"
            "shufps      $0xAA,  %%xmm1, %%xmm1\n"
            "mulps       %%xmm6, %%xmm1\n"
            "addps       %%xmm1, %%xmm2\n"

            // W
            "movups      %%xmm0, %%xmm1\n"
            "shufps      $0xFF,  %%xmm1, %%xmm1\n"
            "mulps       %%xmm7, %%xmm1\n"
            "addps       %%xmm1, %%xmm2\n"

            // Result ->
            "movups      %%xmm2, %0\n"
            : "=m" (list[i])
            : "m" (list[i])
            : "memory", "%xmm0", "%xmm1", "%xmm2", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
#else
        DMVector q;
        memcpy(&q, &list[i], sizeof(DMVector));

        list[i].x = (q.x * mat->m[0][0]) + (q.y * mat->m[1][0]) + (q.z * mat->m[2][0]);
        list[i].y = (q.x * mat->m[0][1]) + (q.y * mat->m[1][1]) + (q.z * mat->m[2][1]);
        list[i].z = (q.x * mat->m[0][2]) + (q.y * mat->m[1][2]) + (q.z * mat->m[2][2]);
#endif
    }
}


/* Set matrix to unit-matrix
 */
void dm_matrix_unit(DMMatrix *mat)
{
    memset(mat, 0, sizeof(DMMatrix));
    mat->m[0][0] = 1.0f;
    mat->m[1][1] = 1.0f;
    mat->m[2][2] = 1.0f;
    mat->m[3][3] = 1.0f;
}


/* Transpose the matrix mat2 to mat1
 */
void dm_matrix_transpose(DMMatrix *mat1, const DMMatrix *mat2)
{
    int i, j;

    for (i = 0; i < DM_MATRIX_SIZE; i++)
        for (j = 0; j < DM_MATRIX_SIZE; j++)
            mat1->m[i][j] = mat2->m[j][i];
}


/* Multiply matrices mat1 and mat2, putting result into mat1
 */
void dm_matrix_mul_r(DMMatrix *dst, const DMMatrix *mat1, const DMMatrix *mat2)
{
#ifdef DM_USE_SIMD
    asm volatile(
        "mov           %1,        %%ebx\n"
        "mov           %2,        %%edx\n"

        // --------------------------------------------------

        // 0
        "movups        (%%ebx),   %%xmm0\n" // mat1[0]
        "movups        (%%edx),   %%xmm1\n" // mat2[0]
        "shufps        $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
        "mulps         %%xmm0,    %%xmm1\n"
        "movups        %%xmm1,    %%xmm3\n"
        
        // 1
        "movups        16(%%ebx), %%xmm0\n" // mat1[0]
        "movups        (%%edx),   %%xmm1\n" // mat2[0]
        "shufps        $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        // 2
        "movups        32(%%ebx), %%xmm0\n" // mat1[0]
        "movups        (%%edx),   %%xmm1\n" // mat2[0]
        "shufps        $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        // 3
        "movups        48(%%ebx), %%xmm0\n" // mat1[0]
        "movups        (%%edx),   %%xmm1\n" // mat2[0]
        "shufps        $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        "mov           %0,        %%ebx\n"
        "movups        %%xmm3,    (%%ebx)\n"

        // --------------------------------------------------

        "mov           %1,        %%ebx\n"

        // 0
        "movups        (%%ebx),   %%xmm0\n" // mat1[0]
        "movups        16(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
        "mulps         %%xmm0,    %%xmm1\n"
        "movups        %%xmm1,    %%xmm3\n"
        
        // 1
        "movups        16(%%ebx), %%xmm0\n" // mat1[0]
        "movups        16(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        // 2
        "movups        32(%%ebx), %%xmm0\n" // mat1[0]
        "movups        16(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        // 3
        "movups        48(%%ebx), %%xmm0\n" // mat1[0]
        "movups        16(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        "mov           %0,        %%ebx\n"
        "movups        %%xmm3,    16(%%ebx)\n"

        // --------------------------------------------------

        "mov           %1,        %%ebx\n"

        // 0
        "movups        (%%ebx),   %%xmm0\n" // mat1[0]
        "movups        32(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
        "mulps         %%xmm0,    %%xmm1\n"
        "movups        %%xmm1,    %%xmm3\n"
        
        // 1
        "movups        16(%%ebx), %%xmm0\n" // mat1[0]
        "movups        32(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        // 2
        "movups        32(%%ebx), %%xmm0\n" // mat1[0]
        "movups        32(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        // 3
        "movups        48(%%ebx), %%xmm0\n" // mat1[0]
        "movups        32(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        "mov           %0,        %%ebx\n"
        "movups        %%xmm3,    32(%%ebx)\n"

        // --------------------------------------------------

        "mov           %1,        %%ebx\n"

        // 0
        "movups        (%%ebx),   %%xmm0\n" // mat1[0]
        "movups        48(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0x00, %%xmm1, %%xmm1\n" // mat2[0][0]
        "mulps         %%xmm0,    %%xmm1\n"
        "movups        %%xmm1,    %%xmm3\n"
        
        // 1
        "movups        16(%%ebx), %%xmm0\n" // mat1[0]
        "movups        48(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0x55, %%xmm1, %%xmm1\n" // mat2[0][1]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        // 2
        "movups        32(%%ebx), %%xmm0\n" // mat1[0]
        "movups        48(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0xAA, %%xmm1, %%xmm1\n" // mat2[0][2]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        // 3
        "movups        48(%%ebx), %%xmm0\n" // mat1[0]
        "movups        48(%%edx), %%xmm1\n" // mat2[1]
        "shufps        $0xFF, %%xmm1, %%xmm1\n" // mat2[0][3]
        "mulps         %%xmm0,    %%xmm1\n"
        "addps         %%xmm1,    %%xmm3\n"

        "mov           %0,        %%ebx\n"
        "movups        %%xmm3,    48(%%ebx)\n"

        : "=m" (dst)
        : "m" (mat1), "m" (mat2)
        : "memory", "%edx", "%ebx", "%xmm0", "%xmm2", "%xmm3"
        );
#else
    int i, j;
    for (i = 0; i < DM_MATRIX_SIZE; i++)
        for (j = 0; j < DM_MATRIX_SIZE; j++)
            dst->m[i][j] =
                (mat1->m[i][0] * mat2->m[0][j]) +
                (mat1->m[i][1] * mat2->m[1][j]) +
                (mat1->m[i][2] * mat2->m[2][j]);
#endif
}


void dm_matrix_mul(DMMatrix *mat1, const DMMatrix *mat2)
{
    DMMatrix tmpM;
    dm_matrix_mul_r(&tmpM, mat1, mat2);
    memcpy(mat1, &tmpM, sizeof(DMMatrix));
}


/* Multiply given list of matrices (size of nMatrices units) with given matrix.
 */
void dm_matrix_mul_n(DMMatrix * list, const int nlist, const DMMatrix *mat)
{
    int i;
    for (i = 0; i < nlist; i++)
        dm_matrix_mul(&list[i], mat);
}


/* Optimized rotation matrix creation
 */
void dm_matrix_rot(DMMatrix *mat,
    const DMFloat sx, const DMFloat sy, const DMFloat sz,
    const DMFloat cx, const DMFloat cy, const DMFloat cz)
{
    const DMFloat
        q = cx * sz,
        l = cx * cz,
        i = sx * sz,
        j = sx * cz;

    memset(mat, 0, sizeof(DMMatrix));

    mat->m[0][0] = cy * cz;
    mat->m[0][1] = cy * sz;
    mat->m[0][2] = -sy;


    mat->m[1][0] = (sy * j) - q;
    mat->m[1][1] = (sy * i) + l;
    mat->m[1][2] = sx * cy;


    mat->m[2][0] = (sy * l) + i;
    mat->m[2][1] = (sy * q) - j;
    mat->m[2][2] = cx * cy;

    mat->m[3][3] = 1.0f;
}