[XviD-devel] Quality optimization

skal skal at planet-d.net
Tue Feb 25 18:20:44 CET 2003


	Hi,

	almost forgot this one too:

On Wed, 2003-01-22 at 20:01, Marco Al wrote:
> Christoph Lampert wrote:
> 
> >> Do we have some timings for a 8 bit Hadamard transform yet?
> >
> > I did some a while ago of skal's MMXEXT(?) version and posted them to
> > the list. I don't remember, but might have been twice the speed of DCT,
> > but half the speed of SAD.
> 
> The non attributed asm code only managed 173 cycles with 8 bits accourding to
> the source, that is not twice as fast as DCT AFAIK.
> 
	here's a C/MMX/SSE version of the Hadamard transform (16bits).
	Without the 'pshufw' re-ordering, output columns are re-ordered
	according to: [03127465]. C-version spits the correct order...
	Note: Output is also scaled by 8.


	bye!

		Skal
-------------- next part --------------

   ; void xvid_Hadamard_SSE(int16_t Matrix[8*8]);

cglobal xvid_Hadamard_SSE

;//////////////////////////////////////////////////////////////////////

%macro BUTF2 4   ; a, b, c, d
  paddsw %2, %1   ; a+b
  paddsw %4, %3   ; c+d
  paddsw %1, %1   ; 2a
  paddsw %3, %3   ; 2c
  psubsw %1, %2   ; a-b
  psubsw %3, %4   ; c-d
%endmacro

;//////////////////////////////////////////////////////////////////////

%macro HADAMARD_HPASS 2   ; %1: offset  %2:REORDERING
  movq   mm0, [eax+%1]    ; [0123]

  movq   mm7, mm0
  movq   mm1, [eax+%1+8]  ; [4567]

  paddsw mm0, mm1     ; [abcd]
  psubsw mm7, mm1     ; [efgh]

  movq      mm1,mm0
  punpcklwd mm0,mm7   ; [aebf]
  punpckhwd mm1,mm7   ; [cgdh]

  movq   mm7,mm0
  paddsw mm0,mm1      ; [ABCD]
  psubsw mm7,mm1      ; [EFGH]

  movq      mm1,mm0
  punpcklwd mm0,mm7   ; [ABEF]
  punpckhwd mm1,mm7   ; [CDGH]

  movq   mm7,mm0
  paddsw mm0,mm1      ; [0312]
  psubsw mm7,mm1      ; [7465]

%if (%2!=0)   ; SSE only

  pshufw mm0,mm0, 01111000b ; [0123]
  movq [eax+%1  ], mm0
  pshufw mm7,mm7, 00101101b ; [4567]

%else
  movq [eax+%1  ], mm0
%endif

  movq [eax+%1+8], mm7

%endmacro

%macro HADAMARD_VPASS 1   ; src/dst   ; 27c
  movq  mm0, [%1+0*16]
  movq  mm1, [%1+1*16]
  movq  mm2, [%1+2*16]
  movq  mm3, [%1+3*16]
  movq  mm4, [%1+4*16]
  movq  mm5, [%1+5*16]
  movq  mm6, [%1+6*16]
  movq  mm7, [%1+7*16]

  BUTF2  mm0, mm1,  mm2, mm3
  BUTF2  mm1, mm3,  mm0, mm2

  BUTF2  mm4, mm5,  mm6, mm7
  BUTF2  mm4, mm6,  mm5, mm7

  BUTF2  mm3, mm7,  mm0, mm4
  BUTF2  mm2, mm6,  mm1, mm5

  movq  [%1+0*16], mm7
  movq  [%1+1*16], mm3
  movq  [%1+2*16], mm1
  movq  [%1+3*16], mm5
  movq  [%1+4*16], mm4
  movq  [%1+5*16], mm0
  movq  [%1+6*16], mm2
  movq  [%1+7*16], mm6
%endmacro

  ; 135c  (131c without the pshufw reordering)
xvid_Hadamard_SSE:
  mov eax,[esp+4] ; In

  HADAMARD_HPASS 0*16, 1
  HADAMARD_HPASS 1*16, 1
  HADAMARD_HPASS 2*16, 1
  HADAMARD_HPASS 3*16, 1
  HADAMARD_HPASS 4*16, 1
  HADAMARD_HPASS 5*16, 1
  HADAMARD_HPASS 6*16, 1
  HADAMARD_HPASS 7*16, 1
  HADAMARD_VPASS eax
  HADAMARD_VPASS eax+8

  ret

;//////////////////////////////////////////////////////////////////////
-------------- next part --------------
A non-text attachment was scrubbed...
Name: hadamard.c
Type: text/x-c
Size: 2111 bytes
Desc: not available
Url : http://edu.bnhof.de/pipermail/xvid-devel/attachments/20030225/50d5caa9/hadamard.bin


More information about the XviD-devel mailing list