Logo Search packages:      
Sourcecode: fenix version File versions  Download package

scaler_scale2x.c

/*
 *  Fenix - Videogame compiler/interpreter
 *  Current release       : FENIX - PROJECT 1.0 - R 0.84
 *  Last stable release   :
 *  Project documentation : http://fenix.divsite.net
 *
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 *
 *  Copyright © 1999 José Luis Cebrián Pagüe
 *  Copyright © 2002 Fenix Team
 *
 */

/*
 * FILE        : mmx_scale2x.cpp
 * DESCRIPTION : Scale2x effect
 *
 * HISTORY: 0.83 - First version from project 2xScale
 *
 *
 *
 * You can find an high level description of the effect at :
 *
 * http://scale2x.sourceforge.net/scale2x.html
 *
 * Alternatively at the previous license terms, you are allowed to use this
 * code in your program with these conditions:
 * - the program is not used in commercial activities.
 * - the whole source code of the program is released with the binary.
 * - derivative works of the program are allowed.
 */

#ifdef MMX_FUNCTIONS
extern int MMX_available;
#endif

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "fxi.h"

/* Suggested in "Intel Optimization" for Pentium II */
#define ASM_JUMP_ALIGN ".p2align 4\n"

static void internal_scale2x_16_def(Uint16 *dst0, Uint16* dst1, const Uint16* src0, const Uint16* src1, const Uint16* src2, unsigned count) {
  /* first pixel */
  dst0[0] = src1[0];
  dst1[0] = src1[0];
  if (src1[1] == src0[0] && src2[0] != src0[0])
    dst0[1] =src0[0];
  else
    dst0[1] =src1[0];
  if (src1[1] == src2[0] && src0[0] != src2[0])
    dst1[1] =src2[0];
  else
    dst1[1] =src1[0];
  ++src0;
  ++src1;
  ++src2;
  dst0 += 2;
  dst1 += 2;

  /* central pixels */
  count -= 2;
  while (count) {
    if (src1[-1] == src0[0] && src2[0] != src0[0] && src1[1] != src0[0])
      dst0[0] = src0[0];
    else
      dst0[0] = src1[0];
    if (src1[1] == src0[0] && src2[0] != src0[0] && src1[-1] != src0[0])
      dst0[1] =src0[0];
    else
      dst0[1] =src1[0];

    if (src1[-1] == src2[0] && src0[0] != src2[0] && src1[1] != src2[0])
      dst1[0] =src2[0];
    else
      dst1[0] =src1[0];
    if (src1[1] == src2[0] && src0[0] != src2[0] && src1[-1] != src2[0])
      dst1[1] =src2[0];
    else
      dst1[1] =src1[0];

    ++src0;
    ++src1;
    ++src2;
    dst0 += 2;
    dst1 += 2;
    --count;
  }

  /* last pixel */
  if (src1[-1] == src0[0] && src2[0] != src0[0])
    dst0[0] =src0[0];
  else
    dst0[0] =src1[0];
  if (src1[-1] == src2[0] && src0[0] != src2[0])
    dst1[0] =src2[0];
  else
    dst1[0] =src1[0];
  dst0[1] =src1[0];
  dst1[1] =src1[0];
}

#if 0
static void internal_scale2x_32_def(Uint32* dst0,
                                    Uint32* dst1,
                                    const Uint32* src0,
                                    const Uint32* src1,
                                    const Uint32* src2,
                                    unsigned count) {
  /* first pixel */
  dst0[0] = src1[0];
  dst1[0] = src1[0];
  if (src1[1] == src0[0] && src2[0] != src0[0])
    dst0[1] = src0[0];
  else
    dst0[1] = src1[0];
  if (src1[1] == src2[0] && src0[0] != src2[0])
    dst1[1] = src2[0];
  else
    dst1[1] = src1[0];
  ++src0;
  ++src1;
  ++src2;
  dst0 += 2;
  dst1 += 2;

  /* central pixels */
  count -= 2;
  while (count) {
    if (src1[-1] == src0[0] && src2[0] != src0[0] && src1[1] != src0[0])
      dst0[0] = src0[0];
    else
      dst0[0] = src1[0];
    if (src1[1] == src0[0] && src2[0] != src0[0] && src1[-1] != src0[0])
      dst0[1] = src0[0];
    else
      dst0[1] = src1[0];

    if (src1[-1] == src2[0] && src0[0] != src2[0] && src1[1] != src2[0])
      dst1[0] = src2[0];
    else
      dst1[0] = src1[0];
    if (src1[1] == src2[0] && src0[0] != src2[0] && src1[-1] != src2[0])
      dst1[1] = src2[0];
    else
      dst1[1] = src1[0];

    ++src0;
    ++src1;
    ++src2;
    dst0 += 2;
    dst1 += 2;
    --count;
  }

  /* last pixel */
  if (src1[-1] == src0[0] && src2[0] != src0[0])
    dst0[0] = src0[0];
  else
    dst0[0] = src1[0];
  if (src1[-1] == src2[0] && src0[0] != src2[0])
    dst1[0] = src2[0];
  else
    dst1[0] = src1[0];
  dst0[1] = src1[0];
  dst1[1] = src1[0];
}
#endif

#ifdef MMX_FUNCTIONS
static void internal_scale2x_16_mmx_single(Uint16* dst, const Uint16* src0, const Uint16* src1, const Uint16* src2, unsigned count) {
  /* always do the first and last run */
  count -= 2*4;

  #ifdef __GNUC__
  __asm__ __volatile__(
    /* first run */
    /* set the current, current_pre, current_next registers */
    "pxor %%mm0,%%mm0\n" /* use a fake black out of screen */
    "movq 0(%1),%%mm7\n"
    "movq 8(%1),%%mm1\n"
    "psrlq $48,%%mm0\n"
    "psllq $48,%%mm1\n"
    "movq %%mm7,%%mm2\n"
    "movq %%mm7,%%mm3\n"
    "psllq $16,%%mm2\n"
    "psrlq $16,%%mm3\n"
    "por %%mm2,%%mm0\n"
    "por %%mm3,%%mm1\n"

    /* current_upper */
    "movq (%0),%%mm6\n"

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "movq %%mm0,%%mm3\n"
    "movq %%mm1,%%mm5\n"
    "pcmpeqw %%mm6,%%mm2\n"
    "pcmpeqw %%mm6,%%mm4\n"
    "pcmpeqw (%2),%%mm3\n"
    "pcmpeqw (%2),%%mm5\n"
    "pandn %%mm2,%%mm3\n"
    "pandn %%mm4,%%mm5\n"
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "pcmpeqw %%mm1,%%mm2\n"
    "pcmpeqw %%mm0,%%mm4\n"
    "pandn %%mm3,%%mm2\n"
    "pandn %%mm5,%%mm4\n"
    "movq %%mm2,%%mm3\n"
    "movq %%mm4,%%mm5\n"
    "pand %%mm6,%%mm2\n"
    "pand %%mm6,%%mm4\n"
    "pandn %%mm7,%%mm3\n"
    "pandn %%mm7,%%mm5\n"
    "por %%mm3,%%mm2\n"
    "por %%mm5,%%mm4\n"

    /* set *dst0 */
    "movq %%mm2,%%mm3\n"
    "punpcklwd %%mm4,%%mm2\n"
    "punpckhwd %%mm4,%%mm3\n"
    "movq %%mm2,(%3)\n"
    "movq %%mm3,8(%3)\n"

    /* next */
    "addl $8,%0\n"
    "addl $8,%1\n"
    "addl $8,%2\n"
    "addl $16,%3\n"

    /* central runs */
    "shrl $2,%4\n"
    "jz 1f\n"
    ASM_JUMP_ALIGN
    "0:\n"

    /* set the current, current_pre, current_next registers */
    "movq -8(%1),%%mm0\n"
    "movq (%1),%%mm7\n"
    "movq 8(%1),%%mm1\n"
    "psrlq $48,%%mm0\n"
    "psllq $48,%%mm1\n"
    "movq %%mm7,%%mm2\n"
    "movq %%mm7,%%mm3\n"
    "psllq $16,%%mm2\n"
    "psrlq $16,%%mm3\n"
    "por %%mm2,%%mm0\n"
    "por %%mm3,%%mm1\n"

    /* current_upper */
    "movq (%0),%%mm6\n"

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "movq %%mm0,%%mm3\n"
    "movq %%mm1,%%mm5\n"
    "pcmpeqw %%mm6,%%mm2\n"
    "pcmpeqw %%mm6,%%mm4\n"
    "pcmpeqw (%2),%%mm3\n"
    "pcmpeqw (%2),%%mm5\n"
    "pandn %%mm2,%%mm3\n"
    "pandn %%mm4,%%mm5\n"
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "pcmpeqw %%mm1,%%mm2\n"
    "pcmpeqw %%mm0,%%mm4\n"
    "pandn %%mm3,%%mm2\n"
    "pandn %%mm5,%%mm4\n"
    "movq %%mm2,%%mm3\n"
    "movq %%mm4,%%mm5\n"
    "pand %%mm6,%%mm2\n"
    "pand %%mm6,%%mm4\n"
    "pandn %%mm7,%%mm3\n"
    "pandn %%mm7,%%mm5\n"
    "por %%mm3,%%mm2\n"
    "por %%mm5,%%mm4\n"

    /* set *dst0 */
    "movq %%mm2,%%mm3\n"
    "punpcklwd %%mm4,%%mm2\n"
    "punpckhwd %%mm4,%%mm3\n"
    "movq %%mm2,(%3)\n"
    "movq %%mm3,8(%3)\n"

    /* next */
    "addl $8,%0\n"
    "addl $8,%1\n"
    "addl $8,%2\n"
    "addl $16,%3\n"

    "decl %4\n"
    "jnz 0b\n"
    "1:\n"

    /* final run */
    /* set the current, current_pre, current_next registers */
    "movq -8(%1),%%mm0\n"
    "movq (%1),%%mm7\n"
    "pxor %%mm1,%%mm1\n" /* use a fake black out of screen */
    "psrlq $48,%%mm0\n"
    "psllq $48,%%mm1\n"
    "movq %%mm7,%%mm2\n"
    "movq %%mm7,%%mm3\n"
    "psllq $16,%%mm2\n"
    "psrlq $16,%%mm3\n"
    "por %%mm2,%%mm0\n"
    "por %%mm3,%%mm1\n"

    /* current_upper */
    "movq (%0),%%mm6\n"

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "movq %%mm0,%%mm3\n"
    "movq %%mm1,%%mm5\n"
    "pcmpeqw %%mm6,%%mm2\n"
    "pcmpeqw %%mm6,%%mm4\n"
    "pcmpeqw (%2),%%mm3\n"
    "pcmpeqw (%2),%%mm5\n"
    "pandn %%mm2,%%mm3\n"
    "pandn %%mm4,%%mm5\n"
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "pcmpeqw %%mm1,%%mm2\n"
    "pcmpeqw %%mm0,%%mm4\n"
    "pandn %%mm3,%%mm2\n"
    "pandn %%mm5,%%mm4\n"
    "movq %%mm2,%%mm3\n"
    "movq %%mm4,%%mm5\n"
    "pand %%mm6,%%mm2\n"
    "pand %%mm6,%%mm4\n"
    "pandn %%mm7,%%mm3\n"
    "pandn %%mm7,%%mm5\n"
    "por %%mm3,%%mm2\n"
    "por %%mm5,%%mm4\n"

    /* set *dst0 */
    "movq %%mm2,%%mm3\n"
    "punpcklwd %%mm4,%%mm2\n"
    "punpckhwd %%mm4,%%mm3\n"
    "movq %%mm2,(%3)\n"
    "movq %%mm3,8(%3)\n"
    "emms\n"

    : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
    :
    : "cc"
    );
  #else
  __asm {
    mov eax, src0;
    mov ebx, src1;
    mov ecx, src2;
    mov edx, dst;
    mov esi, count;

    /* first run */
    /* set the current, current_pre, current_next registers */
    pxor mm0,mm0; /* use a fake black out of screen */
    movq mm7, qword ptr [ebx];
    movq mm1, qword ptr [ebx + 8];
    psrlq mm0, 48;
    psllq mm1, 48;
    movq mm2, mm7;
    movq mm3, mm7;
    psllq mm2, 16;
    psrlq mm3, 16;
    por mm0, mm2;
    por mm1, mm3;

    /* current_upper */
    movq mm6, qword ptr [eax];

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    movq mm2, mm0;
    movq mm4, mm1;
    movq mm3, mm0;
    movq mm5, mm1;
    pcmpeqw mm2, mm6;
    pcmpeqw mm4, mm6;
    pcmpeqw mm3, qword ptr [ecx];
    pcmpeqw mm5, qword ptr [ecx];
    pandn mm3,mm2;
    pandn mm5,mm4;
    movq mm2,mm0;
    movq mm4,mm1;
    pcmpeqw mm2,mm1;
    pcmpeqw mm4,mm0;
    pandn mm2,mm3;
    pandn mm4,mm5;
    movq mm3,mm2;
    movq mm5,mm4;
    pand mm2,mm6;
    pand mm4,mm6;
    pandn mm3,mm7;
    pandn mm5,mm7;
    por mm2,mm3;
    por mm4,mm5;

    /* set *dst0 */
    movq mm3,mm2;
    punpcklwd mm2,mm4;
    punpckhwd mm3,mm4;
    movq qword ptr [edx], mm2;
    movq qword ptr [edx + 8], mm3;

    /* next */
    add eax, 8;
    add ebx, 8;
    add ecx, 8;
    add edx, 16;

    /* central runs */
    shr esi, 2;
    jz label1;
    align 4;
  label0:

    /* set the current, current_pre, current_next registers */
    movq mm0, qword ptr [ebx-8];
    movq mm7, qword ptr [ebx];
    movq mm1, qword ptr [ebx+8];
    psrlq mm0,48;
    psllq mm1,48;
    movq mm2,mm7;
    movq mm3,mm7;
    psllq mm2,16;
    psrlq mm3,16;
    por mm0,mm2;
    por mm1,mm3;

    /* current_upper */
    movq mm6, qword ptr [eax];

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    movq mm2,mm0;
    movq mm4,mm1;
    movq mm3,mm0;
    movq mm5,mm1;
    pcmpeqw mm2,mm6;
    pcmpeqw mm4,mm6;
    pcmpeqw mm3, qword ptr [ecx];
    pcmpeqw mm5, qword ptr [ecx];
    pandn mm3,mm2;
    pandn mm5,mm4;
    movq mm2,mm0;
    movq mm4,mm1;
    pcmpeqw mm2,mm1;
    pcmpeqw mm4,mm0;
    pandn mm2,mm3;
    pandn mm4,mm5;
    movq mm3,mm2;
    movq mm5,mm4;
    pand mm2,mm6;
    pand mm4,mm6;
    pandn mm3,mm7;
    pandn mm5,mm7;
    por mm2,mm3;
    por mm4,mm5;

    /* set *dst0 */
    movq mm3,mm2;
    punpcklwd mm2,mm4;
    punpckhwd mm3,mm4;
    movq qword ptr [edx], mm2;
    movq qword ptr [edx+8], mm3;

    /* next */
    add eax,8;
    add ebx,8;
    add ecx,8;
    add edx,16;

    dec esi;
    jnz label0;
  label1:

    /* final run */
    /* set the current, current_pre, current_next registers */
    movq mm0, qword ptr [ebx-8];
    movq mm7, qword ptr [ebx];
    pxor mm1,mm1; /* use a fake black out of screen */
    psrlq mm0,48;
    psllq mm1,48;
    movq mm2,mm7;
    movq mm3,mm7;
    psllq mm2,16;
    psrlq mm3,16;
    por mm0,mm2;
    por mm1,mm3;

    /* current_upper */
    movq mm6, qword ptr [eax];

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    movq mm2,mm0;
    movq mm4,mm1;
    movq mm3,mm0;
    movq mm5,mm1;
    pcmpeqw mm2,mm6;
    pcmpeqw mm4,mm6;
    pcmpeqw mm3, qword ptr [ecx];
    pcmpeqw mm5, qword ptr [ecx];
    pandn mm3,mm2;
    pandn mm5,mm4;
    movq mm2,mm0;
    movq mm4,mm1;
    pcmpeqw mm2,mm1;
    pcmpeqw mm4,mm0;
    pandn mm2,mm3;
    pandn mm4,mm5;
    movq mm3,mm2;
    movq mm5,mm4;
    pand mm2,mm6;
    pand mm4,mm6;
    pandn mm3,mm7;
    pandn mm5,mm7;
    por mm2,mm3;
    por mm4,mm5;

    /* set *dst0 */
    movq mm3,mm2;
    punpcklwd mm2,mm4;
    punpckhwd mm3,mm4;
    movq qword ptr [edx], mm2;
    movq qword ptr [edx+8], mm3;

    mov src0, eax;
    mov src1, ebx;
    mov src2, ecx;
    mov dst, edx;
    mov count, esi;

    emms;
  }
  #endif
}
#if 0
static void internal_scale2x_32_mmx_single(Uint32* dst, const Uint32* src0, const Uint32* src1, const Uint32* src2, unsigned count) {
  /* always do the first and last run */
  count -= 2*2;

  #ifdef __GNUC__
  __asm__ __volatile__(
    /* first run */
    /* set the current, current_pre, current_next registers */
    "pxor %%mm0,%%mm0\n" /* use a fake black out of screen */
    "movq 0(%1),%%mm7\n"
    "movq 8(%1),%%mm1\n"
    "psrlq $32,%%mm0\n"
    "psllq $32,%%mm1\n"
    "movq %%mm7,%%mm2\n"
    "movq %%mm7,%%mm3\n"
    "psllq $32,%%mm2\n"
    "psrlq $32,%%mm3\n"
    "por %%mm2,%%mm0\n"
    "por %%mm3,%%mm1\n"

    /* current_upper */
    "movq (%0),%%mm6\n"

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "movq %%mm0,%%mm3\n"
    "movq %%mm1,%%mm5\n"
    "pcmpeqd %%mm6,%%mm2\n"
    "pcmpeqd %%mm6,%%mm4\n"
    "pcmpeqd (%2),%%mm3\n"
    "pcmpeqd (%2),%%mm5\n"
    "pandn %%mm2,%%mm3\n"
    "pandn %%mm4,%%mm5\n"
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "pcmpeqd %%mm1,%%mm2\n"
    "pcmpeqd %%mm0,%%mm4\n"
    "pandn %%mm3,%%mm2\n"
    "pandn %%mm5,%%mm4\n"
    "movq %%mm2,%%mm3\n"
    "movq %%mm4,%%mm5\n"
    "pand %%mm6,%%mm2\n"
    "pand %%mm6,%%mm4\n"
    "pandn %%mm7,%%mm3\n"
    "pandn %%mm7,%%mm5\n"
    "por %%mm3,%%mm2\n"
    "por %%mm5,%%mm4\n"

    /* set *dst0 */
    "movq %%mm2,%%mm3\n"
    "punpckldq %%mm4,%%mm2\n"
    "punpckhdq %%mm4,%%mm3\n"
    "movq %%mm2,(%3)\n"
    "movq %%mm3,8(%3)\n"

    /* next */
    "addl $8,%0\n"
    "addl $8,%1\n"
    "addl $8,%2\n"
    "addl $16,%3\n"

    /* central runs */
    "shrl $1,%4\n"
    "jz 1f\n"
    ASM_JUMP_ALIGN
    "0:\n"

    /* set the current, current_pre, current_next registers */
    "movq -8(%1),%%mm0\n"
    "movq (%1),%%mm7\n"
    "movq 8(%1),%%mm1\n"
    "psrlq $32,%%mm0\n"
    "psllq $32,%%mm1\n"
    "movq %%mm7,%%mm2\n"
    "movq %%mm7,%%mm3\n"
    "psllq $32,%%mm2\n"
    "psrlq $32,%%mm3\n"
    "por %%mm2,%%mm0\n"
    "por %%mm3,%%mm1\n"

    /* current_upper */
    "movq (%0),%%mm6\n"

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "movq %%mm0,%%mm3\n"
    "movq %%mm1,%%mm5\n"
    "pcmpeqd %%mm6,%%mm2\n"
    "pcmpeqd %%mm6,%%mm4\n"
    "pcmpeqd (%2),%%mm3\n"
    "pcmpeqd (%2),%%mm5\n"
    "pandn %%mm2,%%mm3\n"
    "pandn %%mm4,%%mm5\n"
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "pcmpeqd %%mm1,%%mm2\n"
    "pcmpeqd %%mm0,%%mm4\n"
    "pandn %%mm3,%%mm2\n"
    "pandn %%mm5,%%mm4\n"
    "movq %%mm2,%%mm3\n"
    "movq %%mm4,%%mm5\n"
    "pand %%mm6,%%mm2\n"
    "pand %%mm6,%%mm4\n"
    "pandn %%mm7,%%mm3\n"
    "pandn %%mm7,%%mm5\n"
    "por %%mm3,%%mm2\n"
    "por %%mm5,%%mm4\n"

    /* set *dst0 */
    "movq %%mm2,%%mm3\n"
    "punpckldq %%mm4,%%mm2\n"
    "punpckhdq %%mm4,%%mm3\n"
    "movq %%mm2,(%3)\n"
    "movq %%mm3,8(%3)\n"

    /* next */
    "addl $8,%0\n"
    "addl $8,%1\n"
    "addl $8,%2\n"
    "addl $16,%3\n"

    "decl %4\n"
    "jnz 0b\n"
    "1:\n"

    /* final run */
    /* set the current, current_pre, current_next registers */
    "movq -8(%1),%%mm0\n"
    "movq (%1),%%mm7\n"
    "pxor %%mm1,%%mm1\n" /* use a fake black out of screen */
    "psrlq $32,%%mm0\n"
    "psllq $32,%%mm1\n"
    "movq %%mm7,%%mm2\n"
    "movq %%mm7,%%mm3\n"
    "psllq $32,%%mm2\n"
    "psrlq $32,%%mm3\n"
    "por %%mm2,%%mm0\n"
    "por %%mm3,%%mm1\n"

    /* current_upper */
    "movq (%0),%%mm6\n"

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "movq %%mm0,%%mm3\n"
    "movq %%mm1,%%mm5\n"
    "pcmpeqd %%mm6,%%mm2\n"
    "pcmpeqd %%mm6,%%mm4\n"
    "pcmpeqd (%2),%%mm3\n"
    "pcmpeqd (%2),%%mm5\n"
    "pandn %%mm2,%%mm3\n"
    "pandn %%mm4,%%mm5\n"
    "movq %%mm0,%%mm2\n"
    "movq %%mm1,%%mm4\n"
    "pcmpeqd %%mm1,%%mm2\n"
    "pcmpeqd %%mm0,%%mm4\n"
    "pandn %%mm3,%%mm2\n"
    "pandn %%mm5,%%mm4\n"
    "movq %%mm2,%%mm3\n"
    "movq %%mm4,%%mm5\n"
    "pand %%mm6,%%mm2\n"
    "pand %%mm6,%%mm4\n"
    "pandn %%mm7,%%mm3\n"
    "pandn %%mm7,%%mm5\n"
    "por %%mm3,%%mm2\n"
    "por %%mm5,%%mm4\n"

    /* set *dst0 */
    "movq %%mm2,%%mm3\n"
    "punpckldq %%mm4,%%mm2\n"
    "punpckhdq %%mm4,%%mm3\n"
    "movq %%mm2,(%3)\n"
    "movq %%mm3,8(%3)\n"
    "emms\n"

    : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
    :
    : "cc"
    );
  #else
  __asm {
    mov eax, src0;
    mov ebx, src1;
    mov ecx, src2;
    mov edx, dst;
    mov esi, count;

    /* first run */
    /* set the current, current_pre, current_next registers */
    pxor mm0,mm0;
    movq mm7,qword ptr [ebx];
    movq mm1,qword ptr [ebx + 8];
    psrlq mm0,32;
    psllq mm1,32;
    movq mm2,mm7;
    movq mm3,mm7;
    psllq mm2,32;
    psrlq mm3,32;
    por mm0,mm2;
    por mm1,mm3;

    /* current_upper */
    movq mm6,qword ptr [eax];

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    movq mm2,mm0;
    movq mm4,mm1;
    movq mm3,mm0;
    movq mm5,mm1;
    pcmpeqd mm2,mm6;
    pcmpeqd mm4,mm6;
    pcmpeqd mm3,qword ptr [ecx];
    pcmpeqd mm5,qword ptr [ecx];
    pandn mm3,mm2;
    pandn mm5,mm4;
    movq mm2,mm0;
    movq mm4,mm1;
    pcmpeqd mm2,mm1;
    pcmpeqd mm4,mm0;
    pandn mm2,mm3;
    pandn mm4,mm5;
    movq mm3,mm2;
    movq mm5,mm4;
    pand mm2,mm6;
    pand mm4,mm6;
    pandn mm3,mm7;
    pandn mm5,mm7;
    por mm2,mm3;
    por mm4,mm5;

    /* set *dst0 */
    movq mm3,mm2;
    punpckldq mm2,mm4;
    punpckhdq mm3,mm4;
    movq qword ptr [edx],mm2;
    movq qword ptr [edx+8],mm3;

    /* next */
    add eax,8;
    add ebx,8;
    add ecx,8;
    add edx,16;

    /* central runs */
    shr esi,1;
    jz label1;
label0:

  /* set the current, current_pre, current_next registers */
    movq mm0,qword ptr [ebx-8];
    movq mm7,qword ptr [ebx];
    movq mm1,qword ptr [ebx+8];
    psrlq mm0,32;
    psllq mm1,32;
    movq mm2,mm7;
    movq mm3,mm7;
    psllq mm2,32;
    psrlq mm3,32;
    por mm0,mm2;
    por mm1,mm3;

    /* current_upper */
    movq mm6,qword ptr[eax];

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    movq mm2,mm0;
    movq mm4,mm1;
    movq mm3,mm0;
    movq mm5,mm1;
    pcmpeqd mm2,mm6;
    pcmpeqd mm4,mm6;
    pcmpeqd mm3,qword ptr[ecx];
    pcmpeqd mm5,qword ptr[ecx];
    pandn mm3,mm2;
    pandn mm5,mm4;
    movq mm2,mm0;
    movq mm4,mm1;
    pcmpeqd mm2,mm1;
    pcmpeqd mm4,mm0;
    pandn mm2,mm3;
    pandn mm4,mm5;
    movq mm3,mm2;
    movq mm5,mm4;
    pand mm2,mm6;
    pand mm4,mm6;
    pandn mm3,mm7;
    pandn mm5,mm7;
    por mm2,mm3;
    por mm4,mm5;

    /* set *dst0 */
    movq mm3,mm2;
    punpckldq mm2,mm4;
    punpckhdq mm3,mm4;
    movq qword ptr [edx],mm2;
    movq qword ptr [edx+8],mm3;

    /* next */
    add eax,8;
    add ebx,8;
    add ecx,8;
    add edx,16;

    dec esi;
    jnz label0;
label1:

    /* final run */
    /* set the current, current_pre, current_next registers */
    movq mm0,qword ptr [ebx-8];
    movq mm7,qword ptr [ebx];
    pxor mm1,mm1;
    psrlq mm0,32;
    psllq mm1,32;
    movq mm2,mm7;
    movq mm3,mm7;
    psllq mm2,32;
    psrlq mm3,32;
    por mm0,mm2;
    por mm1,mm3;

    /* current_upper */
    movq mm6,qword ptr [eax];

    /* compute the upper-left pixel for dst0 on %%mm2 */
    /* compute the upper-right pixel for dst0 on %%mm4 */
    movq mm2,mm0;
    movq mm4,mm1;
    movq mm3,mm0;
    movq mm5,mm1;
    pcmpeqd mm2,mm6;
    pcmpeqd mm4,mm6;
    pcmpeqd mm3,qword ptr [ecx];
    pcmpeqd mm5,qword ptr [ecx];
    pandn mm3,mm2;
    pandn mm5,mm4;
    movq mm2,mm0;
    movq mm4,mm1;
    pcmpeqd mm2,mm1;
    pcmpeqd mm4,mm0;
    pandn mm2,mm3;
    pandn mm4,mm5;
    movq mm3,mm2;
    movq mm5,mm4;
    pand mm2,mm6;
    pand mm4,mm6;
    pandn mm3,mm7;
    pandn mm5,mm7;
    por mm2,mm3;
    por mm4,mm5;

    /* set *dst0 */
    movq mm3,mm2;
    punpckldq mm2,mm4;
    punpckhdq mm3,mm4;
    movq qword ptr [edx],mm2;
    movq qword ptr [edx+8],mm3;

    mov src0, eax;
    mov src1, ebx;
    mov src2, ecx;
    mov dst, edx;
    mov count, esi;

    emms;
  }
  #endif
}
#endif

static void internal_scale2x_16_mmx(Uint16* dst0, Uint16* dst1, const Uint16* src0, const Uint16* src1, const Uint16* src2, unsigned count) {
  //  assert( count >= 2*4 );
  internal_scale2x_16_mmx_single(dst0, src0, src1, src2, count);
  internal_scale2x_16_mmx_single(dst1, src2, src1, src0, count);
}

#if 0
static void internal_scale2x_32_mmx(Uint32* dst0, Uint32* dst1, const Uint32* src0, const Uint32* src1, const Uint32* src2, unsigned count) {
  //  assert( count >= 2*2 );
  internal_scale2x_32_mmx_single(dst0, src0, src1, src2, count);
  internal_scale2x_32_mmx_single(dst1, src2, src1, src0, count);
}
#endif
#endif

void scale2x(Uint8 *srcPtr, Uint32 srcPitch, Uint8 *dstPtr, Uint32 dstPitch, int width, int height)
{
  Uint16 *dst0 = (Uint16 *)dstPtr;
  Uint16 *dst1 = dst0 + (dstPitch/2);

  Uint16 *src0 = (Uint16 *)srcPtr;
  Uint16 *src1 = src0 + (srcPitch/2);
  Uint16 *src2 = src1 + (srcPitch/2);

  int count;

#ifdef MMX_FUNCTIONS
  if(MMX_available) {
    internal_scale2x_16_mmx(dst0, dst1, src0, src0, src1, width);

    count = height;

    count -= 2;
    while(count) {
      dst0 += dstPitch;
      dst1 += dstPitch;
      internal_scale2x_16_mmx(dst0, dst1, src0, src1, src2, width);
      src0 = src1;
      src1 = src2;
      src2 += srcPitch/2;
      --count;
    }
    dst0 += dstPitch;
    dst1 += dstPitch;
    internal_scale2x_16_mmx(dst0, dst1, src0, src1, src1, width);
  } else {
#endif
    internal_scale2x_16_def(dst0, dst1, src0, src0, src1, width);

    count = height;

    count -= 2;
    while(count) {
      dst0 += dstPitch;
      dst1 += dstPitch;
      internal_scale2x_16_def(dst0, dst1, src0, src1, src2, width);
      src0 = src1;
      src1 = src2;
      src2 += srcPitch/2;
      --count;
    }
    dst0 += dstPitch;
    dst1 += dstPitch;
    internal_scale2x_16_def(dst0, dst1, src0, src1, src1, width);
#ifdef MMX_FUNCTIONS
  }
#endif
}

#if 0
void scale2x32(Uint8 *srcPtr, Uint32 srcPitch,
                Uint8 *dstPtr, Uint32 dstPitch, int width, int height)
{
  Uint32 *dst0 = (Uint32 *)dstPtr;
  Uint32 *dst1 = dst0 + (dstPitch/4);

  Uint32 *src0 = (Uint32 *)srcPtr;
  Uint32 *src1 = src0 + (srcPitch/4);
  Uint32 *src2 = src1 + (srcPitch/4);

  int count;

#ifdef MMX_FUNCTIONS
  if(MMX_available) {
    internal_scale2x_32_mmx(dst0, dst1, src0, src0, src1, width);

    count = height;

    count -= 2;
    while(count) {
      dst0 += dstPitch/2;
      dst1 += dstPitch/2;
      internal_scale2x_32_mmx(dst0, dst1, src0, src1, src2, width);
      src0 = src1;
      src1 = src2;
      src2 += srcPitch/4;
      --count;
    }
    dst0 += dstPitch/2;
    dst1 += dstPitch/2;
    internal_scale2x_32_mmx(dst0, dst1, src0, src1, src1, width);
  } else {
#endif
    internal_scale2x_32_def(dst0, dst1, src0, src0, src1, width);

    count = height;

    count -= 2;
    while(count) {
      dst0 += dstPitch/2;
      dst1 += dstPitch/2;
      internal_scale2x_32_def(dst0, dst1, src0, src1, src2, width);
      src0 = src1;
      src1 = src2;
      src2 += srcPitch/4;
      --count;
    }
    dst0 += dstPitch/2;
    dst1 += dstPitch/2;
    internal_scale2x_32_def(dst0, dst1, src0, src1, src1, width);
#ifdef MMX_FUNCTIONS
  }
#endif
}
#endif

Generated by  Doxygen 1.6.0   Back to index