[XviD-devel] MMX improvements

Tue Jul 12 14:27:17 CEST 2005

Ok, after some hours of work and study (I had to understand how CVS works) I was able to make the patch with TortoiseCVS.
I also did the benchmark as suggested in one message, and it worked fine.
The patch is attached with this message.

By the way, I also noticed a thing in one source but I'm not sure.
The source is "src/bitstream/x86_asm/cbp_sse2.asm".
At line 68 the macro LOOP_SSE2 has a "test ecx, ecx" but this instruction is already after all calls of that macro (example: at line 105).
In my opinion the line 68 should be removed because it just duplicates the testing opcode.
I'm sorry if I don't give you the patch directly, but I can't test this change.
My PC is an old Pentium 200MHz with MMX and it haven't SSE or SSE2 support.
I hope somebody could test it.

I would like to use this message for asking two questions:
1) Why can't I join to XVid's forums?
I tried to make the registration, but it's disabled.
However it isn't very important because I can use this mailing list when I want to release my changes to the source code.

2) What's the most important feature that's missing right now?
Since I'm new here, I just wanted some guidelines for pointing my attentions to some specific parts of the source code.

Sincerely,

Carlo Bramini

---------- Initial Header -----------

>From      : xvid-devel-bounces at xvid.org
To          : xvid-devel at xvid.org
Cc          : 
Date      : Mon, 11 Jul 2005 12:06:00 +0100
Subject : Re: [XviD-devel] MMX improvements







> Hello.
> 
> On 7/11/05, carlo.bramix <carlo.bramix at libero.it> wrote:
> > Hello everyone.
> > I improved this sources:
> > 
> > src/bitstream/x86_asm/cbp_mmx.asm
> > 
> > My implementation is about 3% to 4% faster than previous code.
> > I would like to make my changes public: how should I do it?
> > 
> > Sincerely,
> > 
> > Carlo Bramini
> 
> You can submit patches to this mailing list. Just cvs diff -u the
> file(s) you've altered and pipe the output into a file. Then attach
> that file to a message here, or if you prefer upload it to some space
> and provide a link.
> 
> Regards,
> Robert Swain
> _______________________________________________
> XviD-devel mailing list
> XviD-devel at xvid.org
> http://list.xvid.org/mailman/listinfo/xvid-devel
> 



____________________________________________________________
Navighi a 4 MEGA e i primi 3 mesi sono GRATIS. 
Scegli Libero Adsl Flat senza limiti su http://www.libero.it
-------------- next part --------------
Index: src/bitstream/x86_asm/cbp_mmx.asm
===================================================================
RCS file: /xvid/xvidcore/src/bitstream/x86_asm/cbp_mmx.asm,v
retrieving revision 1.12
diff -u -r1.12 cbp_mmx.asm

--- src/bitstream/x86_asm/cbp_mmx.asm	29 Aug 2004 10:02:38 -0000	1.12
+++ src/bitstream/x86_asm/cbp_mmx.asm	11 Jul 2005 22:15:40 -0000
@@ -1,136 +1,134 @@
-;/****************************************************************************
-; *
-; *  XVID MPEG-4 VIDEO CODEC
-; *  - MMX CBP computation -
-; *
-; *  Copyright (C) 2001-2003 Peter Ross <pross at xvid.org>
-; *                2002-2003 Pascal Massimino <skal at planet-d.net>
-; *
-; *  This program is free software ; you can redistribute it and/or modify
-; *  it under the terms of the GNU General Public License as published by
-; *  the Free Software Foundation ; either version 2 of the License, or
-; *  (at your option) any later version.
-; *
-; *  This program is distributed in the hope that it will be useful,
-; *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
-; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-; *  GNU General Public License for more details.
-; *
-; *  You should have received a copy of the GNU General Public License
-; *  along with this program ; if not, write to the Free Software
-; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
-; *
-; * $Id: cbp_mmx.asm,v 1.12 2004/08/29 10:02:38 edgomez Exp $
-; *
-; ***************************************************************************/
-
-BITS 32
-
-;=============================================================================
-; Macros
-;=============================================================================
-
-%macro cglobal 1
-	%ifdef PREFIX
-		%ifdef MARK_FUNCS
-			global _%1:function %1.endfunc-%1
-			%define %1 _%1:function %1.endfunc-%1
-		%else
-			global _%1
-			%define %1 _%1
-		%endif
-	%else
-		%ifdef MARK_FUNCS
-			global %1:function %1.endfunc-%1
-		%else
-			global %1
-		%endif
-	%endif
-%endmacro
-
-;=============================================================================
-; Local data
-;=============================================================================
-
-%ifdef FORMAT_COFF
-SECTION .rodata
-%else
-SECTION .rodata align=16
-%endif
-
-ALIGN 16
-
-ignore_dc:
-	dw 0, -1, -1, -1, -1, -1, -1, -1
-
-;=============================================================================
-; Code
-;=============================================================================
-
-SECTION .text
-
-cglobal calc_cbp_mmx
-
-;-----------------------------------------------------------------------------
-; uint32_t calc_cbp_mmx(const int16_t coeff[6][64]);
-;-----------------------------------------------------------------------------
-
-ALIGN 16
-calc_cbp_mmx:
-  push ebx
-  push esi
-
-  mov esi, [esp + 8 + 4]	; coeff
-  xor eax, eax			; cbp = 0
-  mov edx, (1 << 5)
-
-  movq mm7, [ignore_dc]
-
-.loop
-  movq mm0, [esi]
-  movq mm1, [esi+8]
-  pand mm0, mm7
-
-  por mm0, [esi+16]
-  por mm1, [esi+24]
-
-  por mm0, [esi+32]
-  por mm1, [esi+40]
-
-  por mm0, [esi+48]
-  por mm1, [esi+56]
-
-  por mm0, [esi+64]
-  por mm1, [esi+72]
-
-  por mm0, [esi+80]
-  por mm1, [esi+88]
-
-  por mm0, [esi+96]
-  por mm1, [esi+104]
-
-  por mm0, [esi+112]
-  por mm1, [esi+120]
-
-  por mm0, mm1
-  movq mm1, mm0
-  psrlq mm1, 32
-  lea esi, [esi + 128]
-
-  por mm0, mm1
-  movd ebx, mm0
-
-  test ebx, ebx
-  jz .next
-  or eax, edx     ; cbp |= 1 << (5-i)
-
-.next
-  shr edx,1
-  jnc .loop
-
-  pop esi
-  pop ebx
-
-  ret
-.endfunc
-
+;/****************************************************************************
+; *
+; *  XVID MPEG-4 VIDEO CODEC
+; *  - MMX CBP computation -
+; *
+; *  Copyright (C) 2001-2003 Peter Ross <pross at xvid.org>
+; *                2002-2003 Pascal Massimino <skal at planet-d.net>
+; *
+; *  This program is free software ; you can redistribute it and/or modify
+; *  it under the terms of the GNU General Public License as published by
+; *  the Free Software Foundation ; either version 2 of the License, or
+; *  (at your option) any later version.
+; *
+; *  This program is distributed in the hope that it will be useful,
+; *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
+; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; *  GNU General Public License for more details.
+; *
+; *  You should have received a copy of the GNU General Public License
+; *  along with this program ; if not, write to the Free Software
+; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+; *
+; * $Id$
+; *
+; ***************************************************************************/
+
+BITS 32
+
+;=============================================================================
+; Macros
+;=============================================================================
+
+%macro cglobal 1
+	%ifdef PREFIX
+		%ifdef MARK_FUNCS
+			global _%1:function %1.endfunc-%1
+			%define %1 _%1:function %1.endfunc-%1
+		%else
+			global _%1
+			%define %1 _%1
+		%endif
+	%else
+		%ifdef MARK_FUNCS
+			global %1:function %1.endfunc-%1
+		%else
+			global %1
+		%endif
+	%endif
+%endmacro
+
+;=============================================================================
+; Local data
+;=============================================================================
+
+%ifdef FORMAT_COFF
+SECTION .rodata
+%else
+SECTION .rodata align=16
+%endif
+
+ALIGN 16
+
+shift_mask:
+    dd 0x00000020, 0x00000020
+ignore_dc:
+    dw 0, -1, -1, -1
+
+;=============================================================================
+; Code
+;=============================================================================
+
+SECTION .text
+
+cglobal calc_cbp_mmx
+
+;-----------------------------------------------------------------------------
+; uint32_t calc_cbp_mmx(const int16_t coeff[6][64]);
+;-----------------------------------------------------------------------------
+
+ALIGN 16
+calc_cbp_mmx:
+  mov eax, [esp + 4]            ; coeff
+  mov edx, (1 << 5)
+
+  movq mm4, [shift_mask]
+  psubd mm2, mm2                ; used only for comparing
+  movq mm7, [ignore_dc]
+  psubd mm3, mm3                ; cbp = 0
+
+.loop
+  movq mm0, [eax]
+  movq mm1, [eax+8]
+  pand mm0, mm7
+
+  por mm0, [eax+16]
+  por mm1, [eax+24]
+
+  por mm0, [eax+32]
+  por mm1, [eax+40]
+
+  por mm0, [eax+48]
+  por mm1, [eax+56]
+
+  por mm0, [eax+64]
+  por mm1, [eax+72]
+
+  por mm0, [eax+80]
+  por mm1, [eax+88]
+
+  por mm0, [eax+96]
+  por mm1, [eax+104]
+
+  por mm0, [eax+112]
+  por mm1, [eax+120]
+
+  por mm0, mm1
+  pcmpgtd mm0, mm2
+  pand mm0, mm4
+  por mm3, mm0                  ; cbp |= 1 << (5-i)
+  psrld mm4,1
+
+  shr edx,1
+  lea eax, [eax + 128]
+  jnc .loop
+
+  movq mm0, mm3
+  psrlq mm0, 32
+
+  por mm0, mm3
+  movd eax, mm0
+
+  ret
+.endfunc
+