[XviD-devel] asm-ed sad32v : attached

Jim Hauxwell xvid-devel@xvid.org
Mon, 16 Dec 2002 19:27:55 -0000


This is a multi-part message in MIME format.

------=_NextPart_000_0003_01C2A539.3BEEF470
Content-Type: text/plain;
	charset="iso-8859-1"
Content-Transfer-Encoding: 7bit

Hi,

I've attached the xxm version of sad32v.  I wrote it in intrinsics, to which
the compiler I has done a very good job, and pasted it into the .asm.  The
original C is included for reference.  If this is OK, then I will clean it
up and repost.

Jim

------=_NextPart_000_0003_01C2A539.3BEEF470
Content-Type: text/plain;
	name="sad_xmm.asm"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="sad_xmm.asm"

;/***********************************************************************=
***
; *
; *	XVID MPEG-4 VIDEO CODEC
; *	xmm sum of absolute difference
; *
; *	This program is an implementation of a part of one or more MPEG-4
; *	Video tools as specified in ISO/IEC 14496-2 standard.  Those =
intending
; *	to use this software module in hardware or software products are
; *	advised that its use may infringe existing patents or copyrights, =
and
; *	any such use would be at such party's own risk.  The original
; *	developer of this software module and his/her company, and =
subsequent
; *	editors and their companies, will have no liability for use of this
; *	software or modifications or derivatives thereof.
; *
; *	This program is free software; you can redistribute it and/or modify
; *	it under the terms of the GNU General Public License as published by
; *	the Free Software Foundation; either version 2 of the License, or
; *	(at your option) any later version.
; *
; *	This program is distributed in the hope that it will be useful,
; *	but WITHOUT ANY WARRANTY; without even the implied warranty of
; *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; *	GNU General Public License for more details.
; *
; *	You should have received a copy of the GNU General Public License
; *	along with this program; if not, write to the Free Software
; *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
; *
; =
*************************************************************************=
/

;/***********************************************************************=
***
; *
; *	History:
; *
; * 23.07.2002	sad8bi_xmm; <pross@xvid.org>
; * 04.06.2002  rewrote some funcs (XMM mainly)     -Skal-
; * 17.11.2001  bugfix and small improvement for dev16_xmm,
; *             removed terminate early in sad16_xmm (Isibaar)
; *	12.11.2001	inital version; (c)2001 peter ross <pross@cs.rmit.edu.au>
; *
; =
*************************************************************************=
/

bits 32

%macro cglobal 1=20
	%ifdef PREFIX
		global _%1=20
		%define %1 _%1
	%else
		global %1
	%endif
%endmacro

section .data

align 16
mmx_one	times 4	dw 1

section .text

cglobal  sad16_xmm
cglobal  sad8_xmm
cglobal  sad16bi_xmm
cglobal  sad8bi_xmm
cglobal  dev16_xmm

;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D
;
; uint32_t sad16_xmm(const uint8_t * const cur,
;					const uint8_t * const ref,
;					const uint32_t stride,
;					const uint32_t best_sad);
;
;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D

%macro SAD_16x16_SSE 0
    movq mm0, [eax]
    psadbw mm0, [edx]
    movq mm1, [eax+8]
    add eax, ecx
    psadbw mm1, [edx+8]
    paddusw mm5,mm0
    add edx, ecx
    paddusw mm6,mm1
%endmacro

align 16
sad16_xmm:

    mov eax, [esp+ 4] ; Src1
    mov edx, [esp+ 8] ; Src2
    mov ecx, [esp+12] ; Stride

    pxor mm5, mm5 ; accum1
    pxor mm6, mm6 ; accum2

    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE

    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE

    paddusw mm6,mm5
    movd eax, mm6
    ret


;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D
;
; uint32_t sad8_xmm(const uint8_t * const cur,
;					const uint8_t * const ref,
;					const uint32_t stride);
;
;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D

%macro SAD_8x8_SSE 0
    movq mm0, [eax]
    movq mm1, [eax+ecx]

    psadbw mm0, [edx]
    psadbw mm1, [edx+ecx]
    add eax, ebx
    add edx, ebx

    paddusw mm5,mm0
    paddusw mm6,mm1
%endmacro

align 16
sad8_xmm:

    mov eax, [esp+ 4] ; Src1
    mov edx, [esp+ 8] ; Src2
    mov ecx, [esp+12] ; Stride
    push ebx
    lea ebx, [ecx+ecx]
   =20
    pxor mm5, mm5 ; accum1
    pxor mm6, mm6 ; accum2

    SAD_8x8_SSE
    SAD_8x8_SSE
    SAD_8x8_SSE

    movq mm0, [eax]
    movq mm1, [eax+ecx]
    psadbw mm0, [edx]
    psadbw mm1, [edx+ecx]

    pop ebx

    paddusw mm5,mm0
    paddusw mm6,mm1

    paddusw mm6,mm5
    movd eax, mm6

    ret


;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D
;
; uint32_t sad16bi_xmm(const uint8_t * const cur,
;					const uint8_t * const ref1,
;					const uint8_t * const ref2,
;					const uint32_t stride);
;
;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D

%macro SADBI_16x16_SSE 0
    movq mm0, [eax]
    movq mm1, [eax+8]

    movq mm2, [edx]
    movq mm3, [edx+8]

    pavgb mm2, [ebx]
    add edx, ecx

    pavgb mm3, [ebx+8]
    add ebx, ecx

    psadbw mm0, mm2
    add eax, ecx

    psadbw mm1, mm3
    paddusw mm5,mm0

    paddusw mm6,mm1   =20
%endmacro

align 16
sad16bi_xmm:
    push ebx
    mov eax, [esp+4+ 4] ; Src
    mov edx, [esp+4+ 8] ; Ref1
    mov ebx, [esp+4+12] ; Ref2
    mov ecx, [esp+4+16] ; Stride

    pxor mm5, mm5 ; accum1
    pxor mm6, mm6 ; accum2

    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE

    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE
    SADBI_16x16_SSE

    paddusw mm6,mm5
    movd eax, mm6
    pop ebx
    ret

;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=20
;=20
; uint32_t sad8bi_xmm(const uint8_t * const cur,=20
; const uint8_t * const ref1,=20
; const uint8_t * const ref2,=20
; const uint32_t stride);=20
;=20
;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=20

%macro SADBI_8x8_XMM 0=20
   movq mm0, [eax]=20
   movq mm1, [eax+ecx]=20

   movq mm2, [edx]=20
   movq mm3, [edx+ecx]=20

   pavgb mm2, [ebx]=20
   lea edx, [edx+2*ecx]=20

   pavgb mm3, [ebx+ecx]=20
   lea ebx, [ebx+2*ecx]=20

   psadbw mm0, mm2=20
   lea eax, [eax+2*ecx]=20

   psadbw mm1, mm3=20
   paddusw mm5,mm0=20

   paddusw mm6,mm1=20
%endmacro=20

align 16=20
sad8bi_xmm:=20
   push ebx=20
   mov eax, [esp+4+ 4] ; Src=20
   mov edx, [esp+4+ 8] ; Ref1=20
   mov ebx, [esp+4+12] ; Ref2=20
   mov ecx, [esp+4+16] ; Stride=20

   pxor mm5, mm5 ; accum1=20
   pxor mm6, mm6 ; accum2=20
.Loop=20
   SADBI_8x8_XMM=20
   SADBI_8x8_XMM=20
   SADBI_8x8_XMM=20
   SADBI_8x8_XMM=20

   paddusw mm6,mm5=20
   movd eax, mm6=20
   pop ebx=20
   ret=20


;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D
;
; uint32_t dev16_xmm(const uint8_t * const cur,
;					const uint32_t stride);
;
;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D

%macro MEAN_16x16_SSE 0
    movq mm0, [eax]
    movq mm1, [eax+8]
    psadbw mm0, mm7
    psadbw mm1, mm7
    add eax, ecx
    paddw mm5, mm0=20
    paddw mm6, mm1
%endmacro
                           =20
%macro ABS_16x16_SSE 0
    movq mm0, [eax]
    movq mm1, [eax+8]
    psadbw mm0, mm4
    psadbw mm1, mm4
    lea eax,[eax+ecx]
    paddw mm5, mm0
    paddw mm6, mm1
%endmacro

align 16
dev16_xmm:

    mov eax, [esp+ 4] ; Src
    mov ecx, [esp+ 8] ; Stride
   =20
    pxor mm7, mm7 ; zero
    pxor mm5, mm5 ; mean accums
    pxor mm6, mm6

    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE

    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE
    MEAN_16x16_SSE

    paddusw mm6, mm5

	movq mm4, mm6
	psllq mm4, 32
	paddd mm4, mm6
	psrld mm4, 8      ; /=3D (16*16)

	packssdw mm4, mm4
	packuswb mm4, mm4

    ; mm4 contains the mean

    mov eax, [esp+ 4] ; Src


    pxor mm5, mm5 ; sums
    pxor mm6, mm6

    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE

    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE
    ABS_16x16_SSE

    paddusw mm6, mm5
	movq mm7, mm6
	psllq mm7, 32=20
	paddd mm6, mm7

    movd eax, mm6
    ret

cglobal sad16v_xmm

;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D
;int sad16v_xmm(const uint8_t * const cur,
;               const uint8_t * const ref,
;               const uint32_t stride,
;               int* sad8);
;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D
align 16
sad16v_xmm:
    push ebx
    mov eax, [esp+4+ 4] ; Src1
    mov edx, [esp+4+ 8] ; Src2
    mov ecx, [esp+4+12] ; Stride
    mov ebx, [esp+4+16] ; sad ptr

    pxor mm5, mm5 ; accum1
    pxor mm6, mm6 ; accum2
    pxor mm7, mm7 ; total
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    paddusw mm7, mm5
    paddusw mm7, mm6
    movd [ebx], mm5
    movd [ebx+4], mm6

    pxor mm5, mm5 ; accum1
    pxor mm6, mm6 ; accum2
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    SAD_16x16_SSE
    paddusw mm7, mm5
    paddusw mm7, mm6
    movd [ebx+8], mm5
    movd [ebx+12], mm6

    movd eax, mm7
    pop ebx
    ret
;--------

;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D
;uint32_t sad32v_xmm(const uint8_t * const cur,=20
;			   const uint8_t * const ref,=20
;			   const uint32_t stride,=20
;			   int32_t *sad)
;{
;	int i;
;	unsigned int sad0 =3D 0;
;	unsigned int sad1 =3D 0;
;	unsigned int sad2 =3D 0;
;	unsigned int sad3 =3D 0;
;	uint8_t const *ptr_cur =3D cur;
;	uint8_t const *ptr_ref =3D ref;
;
;#pragma unroll(8)
;	for (i =3D 0; i < 8; i++, ptr_cur +=3D stride, ptr_ref +=3D stride)
;	{
;		/* first 8 pixels */
;		__m64 src_high, ref_high, src_rhigh, ref_rhigh;
;		__m64 src_low =3D ((__m64 *)ptr_cur)[0];
;		__m64 ref_low =3D ((__m64 *)ptr_ref)[0];
;		__m64 sad_mmx =3D _mm_sad_pu8(src_low, ref_low);
;		/* this is added to sad0 */
;		sad0 +=3D _mm_cvtsi64_si32(sad_mmx);
;		/* second 8 pixels */
;		src_high =3D ((__m64 *)ptr_cur)[1];
;		ref_high =3D ((__m64 *)ptr_ref)[1];
;		sad_mmx =3D _mm_sad_pu8(src_high, ref_high);
;		/* this is added to both sad0 & sad1 */
;		sad0 +=3D _mm_cvtsi64_si32(sad_mmx);
;		sad1 +=3D _mm_cvtsi64_si32(sad_mmx);
;		/* last 8 pixels */
;		src_rhigh =3D ((__m64 *)ptr_cur)[2];
;		ref_rhigh =3D ((__m64 *)ptr_ref)[2];
;		sad_mmx =3D _mm_sad_pu8(src_rhigh, ref_rhigh);
;		/* this is added to sad1 */
;		sad1 +=3D _mm_cvtsi64_si32(sad_mmx);
;	}
;
;#pragma unroll(8)
;	for (i =3D 0; i < 8; i++, ptr_cur +=3D stride, ptr_ref +=3D stride)
;	{
;		/* first 8 pixels */
;		__m64 src_high, ref_high, src_rhigh, ref_rhigh;
;		__m64 src_low =3D ((__m64 *)ptr_cur)[0];
;		__m64 ref_low =3D ((__m64 *)ptr_ref)[0];
;		__m64 sad_mmx =3D _mm_sad_pu8(src_low, ref_low);
;		/* this is added to sad0 and sad2 */
;		sad0 +=3D _mm_cvtsi64_si32(sad_mmx);
;		sad2 +=3D _mm_cvtsi64_si32(sad_mmx);
;		/* second 8 pixels */
;		src_high =3D ((__m64 *)ptr_cur)[1];
;		ref_high =3D ((__m64 *)ptr_ref)[1];
;		sad_mmx =3D _mm_sad_pu8(src_high, ref_high);
;		/* this is added to both sad0, sad1, sad2 & sad3 */
;		sad0 +=3D _mm_cvtsi64_si32(sad_mmx);
;		sad1 +=3D _mm_cvtsi64_si32(sad_mmx);
;		sad2 +=3D _mm_cvtsi64_si32(sad_mmx);
;		sad3 +=3D _mm_cvtsi64_si32(sad_mmx);
;		/* last 8 pixels */
;		src_rhigh =3D ((__m64 *)ptr_cur)[2];
;		ref_rhigh =3D ((__m64 *)ptr_ref)[2];
;		sad_mmx =3D _mm_sad_pu8(src_rhigh, ref_rhigh);
;		/* this is added to sad3 */
;		sad3 +=3D _mm_cvtsi64_si32(sad_mmx);
;	}
;
;#pragma unroll(8)
;	for (i =3D 0; i < 8; i++, ptr_cur +=3D stride, ptr_ref +=3D stride)
;	{
;		/* first 8 pixels */
;		__m64 src_high, ref_high, src_rhigh, ref_rhigh;
;		__m64 src_low =3D ((__m64 *)ptr_cur)[0];
;		__m64 ref_low =3D ((__m64 *)ptr_ref)[0];
;		__m64 sad_mmx =3D _mm_sad_pu8(src_low, ref_low);
;		/* this is added to sad2 */
;		sad2 +=3D _mm_cvtsi64_si32(sad_mmx);
;		/* second 8 pixels */
;		src_high =3D ((__m64 *)ptr_cur)[1];
;		ref_high =3D ((__m64 *)ptr_ref)[1];
;		sad_mmx =3D _mm_sad_pu8(src_high, ref_high);
;		/* this is added to both sad2 & sad3 */
;		sad2 +=3D _mm_cvtsi64_si32(sad_mmx);
;		sad3 +=3D _mm_cvtsi64_si32(sad_mmx);
;		/* last 8 pixels */
;		src_rhigh =3D ((__m64 *)ptr_cur)[2];
;		ref_rhigh =3D ((__m64 *)ptr_ref)[2];
;		sad_mmx =3D _mm_sad_pu8(src_rhigh, ref_rhigh);
;		/* this is added to sad1 */
;		sad3 +=3D _mm_cvtsi64_si32(sad_mmx);
;	}
;
;	/* set the result value */
;	sad[0] =3D sad0;
;	sad[1] =3D sad1;
;	sad[2] =3D sad2;
;	sad[3] =3D sad3;
;
;	/* return the result */
;	return sad0 + sad1 + sad2 + sad3;
;};
;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D
cglobal sad32v_xmm
align 16
sad32v_xmm:

$LN373:
        push      edi                                           ;315.1
        push      esi                                           ;315.1
        push      ebp                                           ;315.1
        push      ebx                                           ;315.1
        sub       esp, 20                                       ;315.1
$LN374:
        mov       ecx,  [esp+40]                       ;311.10
        mov       ebp,  [esp+44]                       ;311.10
$LN375:
        movq      mm0,  [ecx]                          ;329.29
$LN376:
        movq      mm7,  [ecx+8]                        ;335.24
$LN377:
        movq      mm1,  [ecx+16]                       ;342.25
$LN378:
        psadbw    mm0,  [ebp]                          ;331.40
$LN379:
        psadbw    mm7,  [ebp+8]                        ;337.35
$LN380:
        psadbw    mm1,  [ebp+16]                       ;344.36
$LN381:
        mov       edx,  [esp+48]                       ;325.26
$LN382:
        lea       edi,  [ebp+edx]                      ;325.45
$LN383:
        movd      eax, mm0                                      ;333.28
$LN384:
        movd      esi, mm7                                      ;339.28
$LN385:
        add       eax, esi                                      ;339.3
$LN386:
        movd      ebx, mm1                                      ;346.28
$LN387:
        add       ebx, esi                                      ;346.3
$LN388:
        lea       esi,  [ecx+edx]                      ;325.26
$LN389:
        movq      mm5,  [esi]                          ;329.29
$LN390:
        movq      mm6,  [esi+8]                        ;335.24
$LN391:
        movq      mm2,  [esi+16]                       ;342.25
$LN392:
        psadbw    mm5,  [edi]                          ;331.40
$LN393:
        psadbw    mm6,  [edi+8]                        ;337.35
$LN394:
        psadbw    mm2,  [edi+16]                       ;344.36
$LN395:
        movd      ecx, mm5                                      ;333.28
$LN396:
        add       eax, ecx                                      ;333.3
$LN397:
        add       esi, edx                                      ;325.26
$LN398:
        movq      mm3,  [esi]                          ;329.29
$LN399:
        movq      mm4,  [esi+8]                        ;335.24
$LN400:
        movq      mm0,  [esi+16]                       ;342.25
$LN401:
        add       edi, edx                                      ;325.45
$LN402:
        psadbw    mm3,  [edi]                          ;331.40
$LN403:
        psadbw    mm4,  [edi+8]                        ;337.35
$LN404:
        psadbw    mm0,  [edi+16]                       ;344.36
$LN405:
        add       esi, edx                                      ;325.26
$LN406:
        add       edi, edx                                      ;325.45
$LN407:
        movd      ecx, mm6                                      ;339.28
$LN408:
        add       eax, ecx                                      ;339.3
$LN409:
        add       ebx, ecx                                      ;340.3
$LN410:
        movd      ecx, mm2                                      ;346.28
$LN411:
        add       ecx, ebx                                      ;346.3
$LN412:
        movd      ebx, mm3                                      ;333.28
$LN413:
        add       eax, ebx                                      ;333.3
$LN414:
        movd      ebx, mm4                                      ;339.28
$LN415:
        add       eax, ebx                                      ;339.3
$LN416:
        add       ecx, ebx                                      ;340.3
$LN417:
        movd      ebx, mm0                                      ;346.28
$LN418:
        movq      mm0,  [esi]                          ;329.29
$LN419:
        psadbw    mm0,  [edi]                          ;331.40
$LN420:
        add       ebx, ecx                                      ;346.3
$LN421:
        movd      ecx, mm0                                      ;333.28
$LN422:
        movq      mm0,  [esi+8]                        ;335.24
$LN423:
        psadbw    mm0,  [edi+8]                        ;337.35
$LN424:
        add       eax, ecx                                      ;333.3
$LN425:
        movd      ecx, mm0                                      ;339.28
$LN426:
        movq      mm0,  [esi+16]                       ;342.25
$LN427:
        psadbw    mm0,  [edi+16]                       ;344.36
$LN428:
        add       eax, ecx                                      ;339.3
$LN429:
        add       ebx, ecx                                      ;340.3
$LN430:
        add       esi, edx                                      ;325.26
$LN431:
        add       edi, edx                                      ;325.45
$LN432:
        movd      ecx, mm0                                      ;346.28
$LN433:
        movq      mm0,  [esi]                          ;329.29
$LN434:
        psadbw    mm0,  [edi]                          ;331.40
$LN435:
        add       ecx, ebx                                      ;346.3
$LN436:
        movd      ebx, mm0                                      ;333.28
$LN437:
        movq      mm0,  [esi+8]                        ;335.24
$LN438:
        psadbw    mm0,  [edi+8]                        ;337.35
$LN439:
        add       eax, ebx                                      ;333.3
$LN440:
        movd      ebx, mm0                                      ;339.28
$LN441:
        movq      mm0,  [esi+16]                       ;342.25
$LN442:
        psadbw    mm0,  [edi+16]                       ;344.36
$LN443:
        add       eax, ebx                                      ;339.3
$LN444:
        add       ecx, ebx                                      ;340.3
$LN445:
        add       esi, edx                                      ;325.26
$LN446:
        add       edi, edx                                      ;325.45
$LN447:
        movd      ebp, mm0                                      ;346.28
$LN448:
        movq      mm0,  [esi]                          ;329.29
$LN449:
        psadbw    mm0,  [edi]                          ;331.40
$LN450:
        add       ebp, ecx                                      ;346.3
$LN451:
        movd      ecx, mm0                                      ;333.28
$LN452:
        movq      mm0,  [esi+8]                        ;335.24
$LN453:
        psadbw    mm0,  [edi+8]                        ;337.35
$LN454:
        add       eax, ecx                                      ;333.3
$LN455:
        movd      ecx, mm0                                      ;339.28
$LN456:
        movq      mm0,  [esi+16]                       ;342.25
$LN457:
        psadbw    mm0,  [edi+16]                       ;344.36
$LN458:
        add       eax, ecx                                      ;339.3
$LN459:
        add       ebp, ecx                                      ;340.3
$LN460:
        add       esi, edx                                      ;325.26
$LN461:
        add       edi, edx                                      ;325.45
$LN462:
        movd      ebx, mm0                                      ;346.28
$LN463:
        movq      mm0,  [esi]                          ;329.29
$LN464:
        psadbw    mm0,  [edi]                          ;331.40
$LN465:
        add       ebx, ebp                                      ;346.3
$LN466:
        movd      ecx, mm0                                      ;333.28
$LN467:
        movq      mm0,  [esi+8]                        ;335.24
$LN468:
        psadbw    mm0,  [edi+8]                        ;337.35
$LN469:
        add       eax, ecx                                      ;333.3
$LN470:
        movd      ecx, mm0                                      ;339.28
$LN471:
        movq      mm0,  [esi+16]                       ;342.25
$LN472:
        psadbw    mm0,  [edi+16]                       ;344.36
$LN473:
        add       eax, ecx                                      ;339.3
$LN474:
        add       ebx, ecx                                      ;340.3
$LN475:
        add       esi, edx                                      ;325.26
$LN476:
        add       edi, edx                                      ;325.45
$LN477:
        movd      ecx, mm0                                      ;346.28
$LN478:
        movq      mm0,  [esi]                          ;329.29
$LN479:
        psadbw    mm0,  [edi]                          ;331.40
$LN480:
        add       ecx, ebx                                      ;346.3
$LN481:
        movd      ebx, mm0                                      ;333.28
$LN482:
        movq      mm0,  [esi+8]                        ;335.24
$LN483:
        psadbw    mm0,  [edi+8]                        ;337.35
$LN484:
        add       eax, ebx                                      ;333.3
$LN485:
        movd      ebx, mm0                                      ;339.28
$LN486:
        movq      mm0,  [esi+16]                       ;342.25
$LN487:
        psadbw    mm0,  [edi+16]                       ;344.36
$LN488:
        add       eax, ebx                                      ;339.3
$LN489:
        add       ecx, ebx                                      ;340.3
$LN490:
        movd      ebx, mm0                                      ;346.28
$LN491:
        add       ebx, ecx                                      ;346.3
        mov        [esp], ebx                          ;346.3
$LN492:
        lea       ecx,  [edx+esi]                      ;325.26
$LN493:
        lea       edx,  [edx+edi]                      ;325.45
                                ; LOE eax edx ecx ebx esi edi bl bh
.B9.3:                          ; Preds .B9.2
$LN494:
        mov       ebp, ebx                                      ;366.3
        mov        [esp+8], edx                        ;
$LN495:
        mov       edx,  [esp+48]                       ;354.29
        movq      mm2,  [esi+edx]                      ;354.29
$LN496:
        movq      mm3,  [esi+edx+8]                    ;361.24
$LN497:
        psadbw    mm2,  [edi+edx]                      ;356.40
$LN498:
        psadbw    mm3,  [edi+edx+8]                    ;363.35
$LN499:
        movd      ebx, mm2                                      ;358.28
$LN500:
        add       eax, ebx                                      ;358.3
$LN501:
        movd      edx, mm3                                      ;365.28
        mov        [esp+4], edx                        ;365.28
$LN502:
        add       eax, edx                                      ;365.3
$LN503:
        add       ebp, edx                                      ;366.3
$LN504:
        mov        [esp+12], eax                       ;365.3
$LN505:
        mov       eax,  [esp+48]                       ;370.25
        movq      mm1,  [esi+eax+16]                   ;370.25
$LN506:
        psadbw    mm1,  [edi+eax+16]                   ;372.36
$LN507:
        add       ebx, edx                                      ;367.3
$LN508:
        movd      edi, mm1                                      ;374.28
$LN509:
        add       edi,  [esp+4]                        ;374.3
$LN510:
        mov       eax,  [esp+48]                       ;350.26
        add       ecx, eax                                      ;350.26
$LN511:
        movq      mm0,  [ecx]                          ;354.29
$LN512:
        movq      mm7,  [ecx+8]                        ;361.24
$LN513:
        movq      mm4,  [ecx+16]                       ;370.25
$LN514:
        mov       edx,  [esp+8]                        ;350.45
        add       edx, eax                                      ;350.45
$LN515:
        psadbw    mm0,  [edx]                          ;356.40
$LN516:
        psadbw    mm7,  [edx+8]                        ;363.35
$LN517:
        psadbw    mm4,  [edx+16]                       ;372.36
$LN518:
        mov       eax,  [esp+12]                       ;358.3
$LN519:
        movd      esi, mm0                                      ;358.28
$LN520:
        add       eax, esi                                      ;358.3
$LN521:
        add       ebx, esi                                      ;359.3
$LN522:
        movd      esi, mm7                                      ;365.28
$LN523:
        add       eax, esi                                      ;365.3
$LN524:
        add       ebp, esi                                      ;366.3
$LN525:
        add       ebx, esi                                      ;367.3
$LN526:
        add       edi, esi                                      ;368.3
$LN527:
        movd      esi, mm4                                      ;374.28
$LN528:
        add       esi, edi                                      ;374.3
$LN529:
        mov       edi,  [esp+48]                       ;350.26
        add       ecx, edi                                      ;350.26
$LN530:
        movq      mm6,  [ecx]                          ;354.29
$LN531:
        movq      mm5,  [ecx+8]                        ;361.24
$LN532:
        movq      mm0,  [ecx+16]                       ;370.25
$LN533:
        add       edx, edi                                      ;350.45
$LN534:
        psadbw    mm6,  [edx]                          ;356.40
$LN535:
        psadbw    mm5,  [edx+8]                        ;363.35
$LN536:
        psadbw    mm0,  [edx+16]                       ;372.36
$LN537:
        movd      edi, mm6                                      ;358.28
$LN538:
        add       eax, edi                                      ;358.3
$LN539:
        add       ebx, edi                                      ;359.3
$LN540:
        movd      edi, mm5                                      ;365.28
$LN541:
        add       eax, edi                                      ;365.3
$LN542:
        add       ebp, edi                                      ;366.3
$LN543:
        add       ebx, edi                                      ;367.3
$LN544:
        add       esi, edi                                      ;368.3
$LN545:
        movd      edi, mm0                                      ;374.28
$LN546:
        add       edi, esi                                      ;374.3
$LN547:
        mov       esi,  [esp+48]                       ;350.26
        add       ecx, esi                                      ;350.26
$LN548:
        movq      mm0,  [ecx]                          ;354.29
$LN549:
        add       edx, esi                                      ;350.45
$LN550:
        psadbw    mm0,  [edx]                          ;356.40
$LN551:
        movd      esi, mm0                                      ;358.28
$LN552:
        movq      mm0,  [ecx+8]                        ;361.24
$LN553:
        psadbw    mm0,  [edx+8]                        ;363.35
$LN554:
        add       eax, esi                                      ;358.3
$LN555:
        add       ebx, esi                                      ;359.3
$LN556:
        movd      esi, mm0                                      ;365.28
$LN557:
        movq      mm0,  [ecx+16]                       ;370.25
$LN558:
        psadbw    mm0,  [edx+16]                       ;372.36
$LN559:
        add       eax, esi                                      ;365.3
$LN560:
        add       ebp, esi                                      ;366.3
$LN561:
        add       ebx, esi                                      ;367.3
$LN562:
        add       edi, esi                                      ;368.3
$LN563:
        movd      esi, mm0                                      ;374.28
$LN564:
        add       esi, edi                                      ;374.3
$LN565:
        mov       edi,  [esp+48]                       ;350.26
        add       ecx, edi                                      ;350.26
$LN566:
        movq      mm0,  [ecx]                          ;354.29
$LN567:
        add       edx, edi                                      ;350.45
$LN568:
        psadbw    mm0,  [edx]                          ;356.40
$LN569:
        movd      edi, mm0                                      ;358.28
$LN570:
        movq      mm0,  [ecx+8]                        ;361.24
$LN571:
        psadbw    mm0,  [edx+8]                        ;363.35
$LN572:
        add       eax, edi                                      ;358.3
$LN573:
        add       ebx, edi                                      ;359.3
$LN574:
        movd      edi, mm0                                      ;365.28
$LN575:
        movq      mm0,  [ecx+16]                       ;370.25
$LN576:
        psadbw    mm0,  [edx+16]                       ;372.36
$LN577:
        add       eax, edi                                      ;365.3
$LN578:
        add       ebp, edi                                      ;366.3
$LN579:
        add       ebx, edi                                      ;367.3
$LN580:
        add       esi, edi                                      ;368.3
$LN581:
        movd      edi, mm0                                      ;374.28
$LN582:
        add       edi, esi                                      ;374.3
$LN583:
        mov       esi,  [esp+48]                       ;350.26
        add       ecx, esi                                      ;350.26
$LN584:
        movq      mm0,  [ecx]                          ;354.29
$LN585:
        add       edx, esi                                      ;350.45
$LN586:
        psadbw    mm0,  [edx]                          ;356.40
$LN587:
        movd      esi, mm0                                      ;358.28
$LN588:
        movq      mm0,  [ecx+8]                        ;361.24
$LN589:
        psadbw    mm0,  [edx+8]                        ;363.35
$LN590:
        add       eax, esi                                      ;358.3
$LN591:
        add       ebx, esi                                      ;359.3
$LN592:
        movd      esi, mm0                                      ;365.28
$LN593:
        movq      mm0,  [ecx+16]                       ;370.25
$LN594:
        psadbw    mm0,  [edx+16]                       ;372.36
$LN595:
        add       eax, esi                                      ;365.3
$LN596:
        add       ebp, esi                                      ;366.3
$LN597:
        add       ebx, esi                                      ;367.3
$LN598:
        add       edi, esi                                      ;368.3
$LN599:
        movd      esi, mm0                                      ;374.28
$LN600:
        add       esi, edi                                      ;374.3
$LN601:
        mov       edi,  [esp+48]                       ;350.26
        add       ecx, edi                                      ;350.26
$LN602:
        movq      mm0,  [ecx]                          ;354.29
$LN603:
        add       edx, edi                                      ;350.45
$LN604:
        psadbw    mm0,  [edx]                          ;356.40
$LN605:
        movd      edi, mm0                                      ;358.28
$LN606:
        movq      mm0,  [ecx+8]                        ;361.24
$LN607:
        psadbw    mm0,  [edx+8]                        ;363.35
$LN608:
        add       eax, edi                                      ;358.3
$LN609:
        add       ebx, edi                                      ;359.3
$LN610:
        movd      edi, mm0                                      ;365.28
$LN611:
        movq      mm0,  [ecx+16]                       ;370.25
$LN612:
        psadbw    mm0,  [edx+16]                       ;372.36
$LN613:
        add       eax, edi                                      ;365.3
$LN614:
        add       ebp, edi                                      ;366.3
$LN615:
        add       ebx, edi                                      ;367.3
$LN616:
        add       esi, edi                                      ;368.3
$LN617:
        movd      edi, mm0                                      ;374.28
$LN618:
        add       edi, esi                                      ;374.3
$LN619:
        mov       esi,  [esp+48]                       ;350.26
        add       ecx, esi                                      ;350.26
$LN620:
        movq      mm0,  [ecx]                          ;354.29
$LN621:
        add       edx, esi                                      ;350.45
$LN622:
        psadbw    mm0,  [edx]                          ;356.40
$LN623:
        movd      esi, mm0                                      ;358.28
$LN624:
        movq      mm0,  [ecx+8]                        ;361.24
$LN625:
        psadbw    mm0,  [edx+8]                        ;363.35
$LN626:
        add       eax, esi                                      ;358.3
$LN627:
        add       ebx, esi                                      ;359.3
$LN628:
        movd      esi, mm0                                      ;365.28
$LN629:
        movq      mm0,  [ecx+16]                       ;370.25
$LN630:
        psadbw    mm0,  [edx+16]                       ;372.36
$LN631:
        add       eax, esi                                      ;365.3
$LN632:
        add       ebp, esi                                      ;366.3
        mov        [esp], ebp                          ;366.3
$LN633:
        add       ebx, esi                                      ;367.3
$LN634:
        add       edi, esi                                      ;368.3
$LN635:
        movd      ebp, mm0                                      ;374.28
$LN636:
        add       ebp, edi                                      ;374.3
$LN637:
        mov       esi,  [esp+48]                       ;350.26
$LN638:
        mov        [esp+4], ebp                        ;374.3
$LN639:
        lea       ebp,  [esi+ecx]                      ;350.26
$LN640:
        lea       esi,  [esi+edx]                      ;350.45
                                ; LOE eax edx ecx ebx ebp esi
.B9.4:                          ; Preds .B9.3
$LN641:
        mov       edi,  [esp+48]                       ;382.29
        movq      mm7,  [ecx+edi]                      ;382.29
$LN642:
        movq      mm1,  [ecx+edi+8]                    ;388.24
$LN643:
        movq      mm2,  [ecx+edi+16]                   ;395.25
$LN644:
        psadbw    mm7,  [edx+edi]                      ;384.40
$LN645:
        psadbw    mm1,  [edx+edi+8]                    ;390.35
$LN646:
        psadbw    mm2,  [edx+edi+16]                   ;397.36
        mov        [esp+12], eax                       ;
$LN647:
        add       ebp, edi                                      ;378.26
$LN648:
        movq      mm0,  [ebp]                          ;382.29
$LN649:
        movq      mm3,  [ebp+8]                        ;388.24
$LN650:
        movq      mm4,  [ebp+16]                       ;395.25
$LN651:
        add       esi, edi                                      ;378.45
$LN652:
        psadbw    mm0,  [esi]                          ;384.40
$LN653:
        psadbw    mm3,  [esi+8]                        ;390.35
$LN654:
        psadbw    mm4,  [esi+16]                       ;397.36
$LN655:
        add       ebp, edi                                      ;378.26
$LN656:
        movq      mm5,  [ebp]                          ;382.29
$LN657:
        movq      mm6,  [ebp+8]                        ;388.24
$LN658:
        add       esi, edi                                      ;378.45
$LN659:
        psadbw    mm5,  [esi]                          ;384.40
$LN660:
        psadbw    mm6,  [esi+8]                        ;390.35
$LN661:
        movd      eax, mm7                                      ;386.28
$LN662:
        add       ebx, eax                                      ;386.3
$LN663:
        movd      eax, mm1                                      ;392.28
$LN664:
        add       ebx, eax                                      ;392.3
        mov        [esp+16], ebx                       ;392.3
$LN665:
        mov       ebx,  [esp+4]                        ;393.3
$LN666:
        mov       edx,  [esp+16]                       ;386.3
$LN667:
        add       ebx, eax                                      ;393.3
$LN668:
        movd      ecx, mm2                                      ;399.28
$LN669:
        add       ebx, ecx                                      ;399.3
$LN670:
        movd      eax, mm0                                      ;386.28
$LN671:
        movq      mm0,  [ebp+16]                       ;395.25
$LN672:
        psadbw    mm0,  [esi+16]                       ;397.36
$LN673:
        mov       ecx,  [esp+12]                       ;403.2
$LN674:
        add       edx, eax                                      ;386.3
$LN675:
        add       ebp, edi                                      ;378.26
$LN676:
        add       esi, edi                                      ;378.45
$LN677:
        movd      eax, mm3                                      ;392.28
$LN678:
        add       edx, eax                                      ;392.3
$LN679:
        add       ebx, eax                                      ;393.3
$LN680:
        movd      eax, mm4                                      ;399.28
$LN681:
        add       ebx, eax                                      ;399.3
$LN682:
        movd      eax, mm5                                      ;386.28
$LN683:
        add       edx, eax                                      ;386.3
$LN684:
        movd      eax, mm6                                      ;392.28
$LN685:
        add       edx, eax                                      ;392.3
$LN686:
        add       ebx, eax                                      ;393.3
$LN687:
        movd      eax, mm0                                      ;399.28
$LN688:
        movq      mm0,  [ebp]                          ;382.29
$LN689:
        psadbw    mm0,  [esi]                          ;384.40
$LN690:
        add       ebx, eax                                      ;399.3
$LN691:
        movd      eax, mm0                                      ;386.28
$LN692:
        movq      mm0,  [ebp+8]                        ;388.24
$LN693:
        psadbw    mm0,  [esi+8]                        ;390.35
$LN694:
        add       edx, eax                                      ;386.3
$LN695:
        movd      eax, mm0                                      ;392.28
$LN696:
        movq      mm0,  [ebp+16]                       ;395.25
$LN697:
        psadbw    mm0,  [esi+16]                       ;397.36
$LN698:
        add       edx, eax                                      ;392.3
$LN699:
        add       ebx, eax                                      ;393.3
$LN700:
        add       ebp, edi                                      ;378.26
$LN701:
        add       esi, edi                                      ;378.45
$LN702:
        movd      eax, mm0                                      ;399.28
$LN703:
        movq      mm0,  [ebp]                          ;382.29
$LN704:
        psadbw    mm0,  [esi]                          ;384.40
$LN705:
        add       ebx, eax                                      ;399.3
$LN706:
        movd      eax, mm0                                      ;386.28
$LN707:
        movq      mm0,  [ebp+8]                        ;388.24
$LN708:
        psadbw    mm0,  [esi+8]                        ;390.35
$LN709:
        add       edx, eax                                      ;386.3
$LN710:
        movd      eax, mm0                                      ;392.28
$LN711:
        movq      mm0,  [ebp+16]                       ;395.25
$LN712:
        psadbw    mm0,  [esi+16]                       ;397.36
$LN713:
        add       edx, eax                                      ;392.3
$LN714:
        add       ebx, eax                                      ;393.3
$LN715:
        add       ebp, edi                                      ;378.26
$LN716:
        add       esi, edi                                      ;378.45
$LN717:
        movd      eax, mm0                                      ;399.28
$LN718:
        movq      mm0,  [ebp]                          ;382.29
$LN719:
        psadbw    mm0,  [esi]                          ;384.40
$LN720:
        add       ebx, eax                                      ;399.3
$LN721:
        movd      eax, mm0                                      ;386.28
$LN722:
        movq      mm0,  [ebp+8]                        ;388.24
$LN723:
        psadbw    mm0,  [esi+8]                        ;390.35
$LN724:
        add       edx, eax                                      ;386.3
$LN725:
        movd      eax, mm0                                      ;392.28
$LN726:
        movq      mm0,  [ebp+16]                       ;395.25
$LN727:
        psadbw    mm0,  [esi+16]                       ;397.36
$LN728:
        add       edx, eax                                      ;392.3
$LN729:
        add       ebx, eax                                      ;393.3
$LN730:
        add       ebp, edi                                      ;378.26
$LN731:
        add       esi, edi                                      ;378.45
$LN732:
        movd      eax, mm0                                      ;399.28
$LN733:
        movq      mm0,  [ebp]                          ;382.29
$LN734:
        psadbw    mm0,  [esi]                          ;384.40
$LN735:
        add       ebx, eax                                      ;399.3
$LN736:
        movd      eax, mm0                                      ;386.28
$LN737:
        movq      mm0,  [ebp+8]                        ;388.24
$LN738:
        psadbw    mm0,  [esi+8]                        ;390.35
$LN739:
        add       edx, eax                                      ;386.3
$LN740:
        movd      eax, mm0                                      ;392.28
$LN741:
        movq      mm0,  [ebp+16]                       ;395.25
$LN742:
        psadbw    mm0,  [esi+16]                       ;397.36
$LN743:
        add       edx, eax                                      ;392.3
$LN744:
        add       ebx, eax                                      ;393.3
$LN745:
        movd      eax, mm0                                      ;399.28
$LN746:
        movq      mm0,  [ebp+edi]                      ;382.29
$LN747:
        psadbw    mm0,  [esi+edi]                      ;384.40
$LN748:
        add       ebx, eax                                      ;399.3
$LN749:
        movd      eax, mm0                                      ;386.28
$LN750:
        movq      mm0,  [ebp+edi+8]                    ;388.24
$LN751:
        psadbw    mm0,  [esi+edi+8]                    ;390.35
$LN752:
        add       edx, eax                                      ;386.3
$LN753:
        movd      eax, mm0                                      ;392.28
$LN754:
        movq      mm0,  [ebp+edi+16]                   ;395.25
$LN755:
        psadbw    mm0,  [esi+edi+16]                   ;397.36
$LN756:
        add       edx, eax                                      ;392.3
$LN757:
        mov       esi,  [esp+52]                       ;403.2
$LN758:
        add       ebx, eax                                      ;393.3
$LN759:
        mov       eax,  [esp]                          ;404.2
$LN760:
        mov        [esi], ecx                          ;403.2
$LN761:
        mov        [esi+4], eax                        ;404.2
$LN762:
        movd      ebp, mm0                                      ;399.28
$LN763:
        add       ebx, ebp                                      ;399.3
$LN764:
        mov        [esi+8], edx                        ;405.2
$LN765:
        mov        [esi+12], ebx                       ;406.2
$LN766:
        add       ecx, eax                                      ;409.16
$LN767:
        add       ecx, edx                                      ;409.23
$LN768:
        add       ecx, ebx                                      ;409.30
        mov       eax, ecx                                      ;409.30
        add       esp, 20                                       ;409.30
        pop       ebx                                           ;409.30
        pop       ebp                                           ;409.30
        pop       esi                                           ;409.30
        pop       edi                                           ;409.30
        ret                                                     ;409.30

;--------
------=_NextPart_000_0003_01C2A539.3BEEF470--