[XviD-devel] Quality optimization
skal
skal at planet-d.net
Tue Feb 25 18:20:44 CET 2003
Hi,
almost forgot this one too:
On Wed, 2003-01-22 at 20:01, Marco Al wrote:
> Christoph Lampert wrote:
>
> >> Do we have some timings for a 8 bit Hadamard transform yet?
> >
> > I did some a while ago of skal's MMXEXT(?) version and posted them to
> > the list. I don't remember, but might have been twice the speed of DCT,
> > but half the speed of SAD.
>
> The non attributed asm code only managed 173 cycles with 8 bits accourding to
> the source, that is not twice as fast as DCT AFAIK.
>
here's a C/MMX/SSE version of the Hadamard transform (16bits).
Without the 'pshufw' re-ordering, output columns are re-ordered
according to: [03127465]. C-version spits the correct order...
Note: Output is also scaled by 8.
bye!
Skal
-------------- next part --------------
; void xvid_Hadamard_SSE(int16_t Matrix[8*8]);
cglobal xvid_Hadamard_SSE
;//////////////////////////////////////////////////////////////////////
%macro BUTF2 4 ; a, b, c, d
paddsw %2, %1 ; a+b
paddsw %4, %3 ; c+d
paddsw %1, %1 ; 2a
paddsw %3, %3 ; 2c
psubsw %1, %2 ; a-b
psubsw %3, %4 ; c-d
%endmacro
;//////////////////////////////////////////////////////////////////////
%macro HADAMARD_HPASS 2 ; %1: offset %2:REORDERING
movq mm0, [eax+%1] ; [0123]
movq mm7, mm0
movq mm1, [eax+%1+8] ; [4567]
paddsw mm0, mm1 ; [abcd]
psubsw mm7, mm1 ; [efgh]
movq mm1,mm0
punpcklwd mm0,mm7 ; [aebf]
punpckhwd mm1,mm7 ; [cgdh]
movq mm7,mm0
paddsw mm0,mm1 ; [ABCD]
psubsw mm7,mm1 ; [EFGH]
movq mm1,mm0
punpcklwd mm0,mm7 ; [ABEF]
punpckhwd mm1,mm7 ; [CDGH]
movq mm7,mm0
paddsw mm0,mm1 ; [0312]
psubsw mm7,mm1 ; [7465]
%if (%2!=0) ; SSE only
pshufw mm0,mm0, 01111000b ; [0123]
movq [eax+%1 ], mm0
pshufw mm7,mm7, 00101101b ; [4567]
%else
movq [eax+%1 ], mm0
%endif
movq [eax+%1+8], mm7
%endmacro
%macro HADAMARD_VPASS 1 ; src/dst ; 27c
movq mm0, [%1+0*16]
movq mm1, [%1+1*16]
movq mm2, [%1+2*16]
movq mm3, [%1+3*16]
movq mm4, [%1+4*16]
movq mm5, [%1+5*16]
movq mm6, [%1+6*16]
movq mm7, [%1+7*16]
BUTF2 mm0, mm1, mm2, mm3
BUTF2 mm1, mm3, mm0, mm2
BUTF2 mm4, mm5, mm6, mm7
BUTF2 mm4, mm6, mm5, mm7
BUTF2 mm3, mm7, mm0, mm4
BUTF2 mm2, mm6, mm1, mm5
movq [%1+0*16], mm7
movq [%1+1*16], mm3
movq [%1+2*16], mm1
movq [%1+3*16], mm5
movq [%1+4*16], mm4
movq [%1+5*16], mm0
movq [%1+6*16], mm2
movq [%1+7*16], mm6
%endmacro
; 135c (131c without the pshufw reordering)
xvid_Hadamard_SSE:
mov eax,[esp+4] ; In
HADAMARD_HPASS 0*16, 1
HADAMARD_HPASS 1*16, 1
HADAMARD_HPASS 2*16, 1
HADAMARD_HPASS 3*16, 1
HADAMARD_HPASS 4*16, 1
HADAMARD_HPASS 5*16, 1
HADAMARD_HPASS 6*16, 1
HADAMARD_HPASS 7*16, 1
HADAMARD_VPASS eax
HADAMARD_VPASS eax+8
ret
;//////////////////////////////////////////////////////////////////////
-------------- next part --------------
A non-text attachment was scrubbed...
Name: hadamard.c
Type: text/x-c
Size: 2111 bytes
Desc: not available
Url : http://edu.bnhof.de/pipermail/xvid-devel/attachments/20030225/50d5caa9/hadamard.bin
More information about the XviD-devel
mailing list