[XviD-devel] need sad8bi for direct mode
peter ross
xvid-devel@xvid.org
Mon, 22 Jul 2002 14:51:34 +1000
>From: Christoph Lampert <chl@math.uni-bonn.de>
>Reply-To: xvid-devel@xvid.org
>To: xvid-devel@xvid.org
>Subject: [XviD-devel] need sad8bi for direct mode
>Date: Sun, 21 Jul 2002 11:56:23 +0200 (CEST)
>
>Hi,
>
>I notice that direct mode for bframes needs interpolated sad for
>8x8 blocks, if it should work together with inter4v mode.
>I'll add a C-version, but MMX/XMM would of course be better.
>
>Also, it definitely needs halfpel interpolation, so currently the
>only way to really test direct mode b-frames is "with halfpel, but
>without inter4v".
>
done! they work in my rand() test environment, but please do some tests
cglobal sad16bi_mmx
;===========================================================================
;
; uint32_t sad16bi_mmx(const uint8_t * const cur,
; const uint8_t * const ref1,
; const uint8_t * const ref2,
; const uint32_t stride);
;
;===========================================================================
%macro SADBI_16x16_MMX 0
movq mm0, [eax] ; src
movq mm2, [eax+8]
movq mm1, [edx] ; ref1
movq mm3, [edx+8]
pavgb mm1, [ebx] ; ref2
lea edx,[edx+ecx]
pavgb mm3, [ebx+8]
lea ebx,[ebx+ecx]
movq mm4, mm0
lea eax,[eax+ecx]
psubusb mm0, mm1
movq mm5, mm2
psubusb mm2, mm3
psubusb mm1, mm4
por mm0, mm1
psubusb mm3, mm5
por mm2, mm3
movq mm1,mm0
movq mm3,mm2
punpcklbw mm0,mm7
punpckhbw mm1,mm7
punpcklbw mm2,mm7
punpckhbw mm3,mm7
paddusw mm0,mm1
paddusw mm2,mm3
paddusw mm6,mm0
paddusw mm6,mm2
%endmacro
align 16
sad16bi_mmx:
push ebx
mov eax, [esp+4+ 4] ; Src
mov edx, [esp+4+ 8] ; Ref1
mov ebx, [esp+4+12] ; Ref2
mov ecx, [esp+4+16] ; Stride
pxor mm6, mm6 ; accum2
pxor mm7, mm7
.Loop
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
SADBI_16x16_MMX
pmaddwd mm6, [mmx_one] ; collapse
movq mm7, mm6
psrlq mm7, 32
paddd mm6, mm7
movd eax, mm6
pop ebx
ret
cglobal sad8bi_mmx
;===========================================================================
;
; uint32_t sad8bi_mmx(const uint8_t * const cur,
; const uint8_t * const ref1,
; const uint8_t * const ref2,
; const uint32_t stride);
;
;===========================================================================
%macro SADBI_8x8_MMX 0
movq mm0, [eax] ; src
movq mm2, [eax+ecx]
movq mm1, [edx] ; ref1
movq mm3, [edx+ecx]
pavgb mm1, [ebx] ; ref2
lea edx,[edx+2*ecx]
pavgb mm3, [ebx+ecx]
lea ebx,[ebx+2*ecx]
movq mm4, mm0
lea eax,[eax+2*ecx]
psubusb mm0, mm1
movq mm5, mm2
psubusb mm2, mm3
psubusb mm1, mm4
por mm0, mm1
psubusb mm3, mm5
por mm2, mm3
movq mm1,mm0
movq mm3,mm2
punpcklbw mm0,mm7
punpckhbw mm1,mm7
punpcklbw mm2,mm7
punpckhbw mm3,mm7
paddusw mm0,mm1
paddusw mm2,mm3
paddusw mm6,mm0
paddusw mm6,mm2
%endmacro
align 16
sad8bi_mmx:
push ebx
mov eax, [esp+4+ 4] ; Src
mov edx, [esp+4+ 8] ; Ref1
mov ebx, [esp+4+12] ; Ref2
mov ecx, [esp+4+16] ; Stride
pxor mm6, mm6 ; accum2
pxor mm7, mm7
.Loop
SADBI_8x8_MMX
SADBI_8x8_MMX
SADBI_8x8_MMX
SADBI_8x8_MMX
pmaddwd mm6, [mmx_one] ; collapse
movq mm7, mm6
psrlq mm7, 32
paddd mm6, mm7
movd eax, mm6
pop ebx
ret
cglobal sad8bi_xmm
;===========================================================================
;
; uint32_t sad8bi_xmm(const uint8_t * const cur,
; const uint8_t * const ref1,
; const uint8_t * const ref2,
; const uint32_t stride);
;
;===========================================================================
%macro SADBI_8x8_XMM 0
movq mm0, [eax]
movq mm1, [eax+ecx]
movq mm2, [edx]
movq mm3, [edx+ecx]
pavgb mm2, [ebx]
lea edx, [edx+2*ecx]
pavgb mm3, [ebx+ecx]
lea ebx, [ebx+2*ecx]
psadbw mm0, mm2
lea eax, [eax+2*ecx]
psadbw mm1, mm3
paddusw mm5,mm0
paddusw mm6,mm1
%endmacro
align 16
sad8bi_xmm:
push ebx
mov eax, [esp+4+ 4] ; Src
mov edx, [esp+4+ 8] ; Ref1
mov ebx, [esp+4+12] ; Ref2
mov ecx, [esp+4+16] ; Stride
pxor mm5, mm5 ; accum1
pxor mm6, mm6 ; accum2
.Loop
SADBI_8x8_XMM
SADBI_8x8_XMM
SADBI_8x8_XMM
SADBI_8x8_XMM
paddusw mm6,mm5
movd eax, mm6
pop ebx
ret
_________________________________________________________________
Chat with friends online, try MSN Messenger: http://messenger.msn.com