[XviD-devel] MMX improvements
carlo.bramix
carlo.bramix at libero.it
Tue Jul 12 14:27:17 CEST 2005
Ok, after some hours of work and study (I had to understand how CVS works) I was able to make the patch with TortoiseCVS.
I also did the benchmark as suggested in one message, and it worked fine.
The patch is attached with this message.
By the way, I also noticed a thing in one source but I'm not sure.
The source is "src/bitstream/x86_asm/cbp_sse2.asm".
At line 68 the macro LOOP_SSE2 has a "test ecx, ecx" but this instruction is already after all calls of that macro (example: at line 105).
In my opinion the line 68 should be removed because it just duplicates the testing opcode.
I'm sorry if I don't give you the patch directly, but I can't test this change.
My PC is an old Pentium 200MHz with MMX and it haven't SSE or SSE2 support.
I hope somebody could test it.
I would like to use this message for asking two questions:
1) Why can't I join to XVid's forums?
I tried to make the registration, but it's disabled.
However it isn't very important because I can use this mailing list when I want to release my changes to the source code.
2) What's the most important feature that's missing right now?
Since I'm new here, I just wanted some guidelines for pointing my attentions to some specific parts of the source code.
Sincerely,
Carlo Bramini
---------- Initial Header -----------
>From : xvid-devel-bounces at xvid.org
To : xvid-devel at xvid.org
Cc :
Date : Mon, 11 Jul 2005 12:06:00 +0100
Subject : Re: [XviD-devel] MMX improvements
> Hello.
>
> On 7/11/05, carlo.bramix <carlo.bramix at libero.it> wrote:
> > Hello everyone.
> > I improved this sources:
> >
> > src/bitstream/x86_asm/cbp_mmx.asm
> >
> > My implementation is about 3% to 4% faster than previous code.
> > I would like to make my changes public: how should I do it?
> >
> > Sincerely,
> >
> > Carlo Bramini
>
> You can submit patches to this mailing list. Just cvs diff -u the
> file(s) you've altered and pipe the output into a file. Then attach
> that file to a message here, or if you prefer upload it to some space
> and provide a link.
>
> Regards,
> Robert Swain
> _______________________________________________
> XviD-devel mailing list
> XviD-devel at xvid.org
> http://list.xvid.org/mailman/listinfo/xvid-devel
>
____________________________________________________________
Navighi a 4 MEGA e i primi 3 mesi sono GRATIS.
Scegli Libero Adsl Flat senza limiti su http://www.libero.it
-------------- next part --------------
Index: src/bitstream/x86_asm/cbp_mmx.asm
===================================================================
RCS file: /xvid/xvidcore/src/bitstream/x86_asm/cbp_mmx.asm,v
retrieving revision 1.12
diff -u -r1.12 cbp_mmx.asm
--- src/bitstream/x86_asm/cbp_mmx.asm 29 Aug 2004 10:02:38 -0000 1.12
+++ src/bitstream/x86_asm/cbp_mmx.asm 11 Jul 2005 22:15:40 -0000
@@ -1,136 +1,134 @@
-;/****************************************************************************
-; *
-; * XVID MPEG-4 VIDEO CODEC
-; * - MMX CBP computation -
-; *
-; * Copyright (C) 2001-2003 Peter Ross <pross at xvid.org>
-; * 2002-2003 Pascal Massimino <skal at planet-d.net>
-; *
-; * This program is free software ; you can redistribute it and/or modify
-; * it under the terms of the GNU General Public License as published by
-; * the Free Software Foundation ; either version 2 of the License, or
-; * (at your option) any later version.
-; *
-; * This program is distributed in the hope that it will be useful,
-; * but WITHOUT ANY WARRANTY ; without even the implied warranty of
-; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-; * GNU General Public License for more details.
-; *
-; * You should have received a copy of the GNU General Public License
-; * along with this program ; if not, write to the Free Software
-; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-; *
-; * $Id: cbp_mmx.asm,v 1.12 2004/08/29 10:02:38 edgomez Exp $
-; *
-; ***************************************************************************/
-
-BITS 32
-
-;=============================================================================
-; Macros
-;=============================================================================
-
-%macro cglobal 1
- %ifdef PREFIX
- %ifdef MARK_FUNCS
- global _%1:function %1.endfunc-%1
- %define %1 _%1:function %1.endfunc-%1
- %else
- global _%1
- %define %1 _%1
- %endif
- %else
- %ifdef MARK_FUNCS
- global %1:function %1.endfunc-%1
- %else
- global %1
- %endif
- %endif
-%endmacro
-
-;=============================================================================
-; Local data
-;=============================================================================
-
-%ifdef FORMAT_COFF
-SECTION .rodata
-%else
-SECTION .rodata align=16
-%endif
-
-ALIGN 16
-
-ignore_dc:
- dw 0, -1, -1, -1, -1, -1, -1, -1
-
-;=============================================================================
-; Code
-;=============================================================================
-
-SECTION .text
-
-cglobal calc_cbp_mmx
-
-;-----------------------------------------------------------------------------
-; uint32_t calc_cbp_mmx(const int16_t coeff[6][64]);
-;-----------------------------------------------------------------------------
-
-ALIGN 16
-calc_cbp_mmx:
- push ebx
- push esi
-
- mov esi, [esp + 8 + 4] ; coeff
- xor eax, eax ; cbp = 0
- mov edx, (1 << 5)
-
- movq mm7, [ignore_dc]
-
-.loop
- movq mm0, [esi]
- movq mm1, [esi+8]
- pand mm0, mm7
-
- por mm0, [esi+16]
- por mm1, [esi+24]
-
- por mm0, [esi+32]
- por mm1, [esi+40]
-
- por mm0, [esi+48]
- por mm1, [esi+56]
-
- por mm0, [esi+64]
- por mm1, [esi+72]
-
- por mm0, [esi+80]
- por mm1, [esi+88]
-
- por mm0, [esi+96]
- por mm1, [esi+104]
-
- por mm0, [esi+112]
- por mm1, [esi+120]
-
- por mm0, mm1
- movq mm1, mm0
- psrlq mm1, 32
- lea esi, [esi + 128]
-
- por mm0, mm1
- movd ebx, mm0
-
- test ebx, ebx
- jz .next
- or eax, edx ; cbp |= 1 << (5-i)
-
-.next
- shr edx,1
- jnc .loop
-
- pop esi
- pop ebx
-
- ret
-.endfunc
-
+;/****************************************************************************
+; *
+; * XVID MPEG-4 VIDEO CODEC
+; * - MMX CBP computation -
+; *
+; * Copyright (C) 2001-2003 Peter Ross <pross at xvid.org>
+; * 2002-2003 Pascal Massimino <skal at planet-d.net>
+; *
+; * This program is free software ; you can redistribute it and/or modify
+; * it under the terms of the GNU General Public License as published by
+; * the Free Software Foundation ; either version 2 of the License, or
+; * (at your option) any later version.
+; *
+; * This program is distributed in the hope that it will be useful,
+; * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; * GNU General Public License for more details.
+; *
+; * You should have received a copy of the GNU General Public License
+; * along with this program ; if not, write to the Free Software
+; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+; *
+; * $Id$
+; *
+; ***************************************************************************/
+
+BITS 32
+
+;=============================================================================
+; Macros
+;=============================================================================
+
+%macro cglobal 1
+ %ifdef PREFIX
+ %ifdef MARK_FUNCS
+ global _%1:function %1.endfunc-%1
+ %define %1 _%1:function %1.endfunc-%1
+ %else
+ global _%1
+ %define %1 _%1
+ %endif
+ %else
+ %ifdef MARK_FUNCS
+ global %1:function %1.endfunc-%1
+ %else
+ global %1
+ %endif
+ %endif
+%endmacro
+
+;=============================================================================
+; Local data
+;=============================================================================
+
+%ifdef FORMAT_COFF
+SECTION .rodata
+%else
+SECTION .rodata align=16
+%endif
+
+ALIGN 16
+
+shift_mask:
+ dd 0x00000020, 0x00000020
+ignore_dc:
+ dw 0, -1, -1, -1
+
+;=============================================================================
+; Code
+;=============================================================================
+
+SECTION .text
+
+cglobal calc_cbp_mmx
+
+;-----------------------------------------------------------------------------
+; uint32_t calc_cbp_mmx(const int16_t coeff[6][64]);
+;-----------------------------------------------------------------------------
+
+ALIGN 16
+calc_cbp_mmx:
+ mov eax, [esp + 4] ; coeff
+ mov edx, (1 << 5)
+
+ movq mm4, [shift_mask]
+ psubd mm2, mm2 ; used only for comparing
+ movq mm7, [ignore_dc]
+ psubd mm3, mm3 ; cbp = 0
+
+.loop
+ movq mm0, [eax]
+ movq mm1, [eax+8]
+ pand mm0, mm7
+
+ por mm0, [eax+16]
+ por mm1, [eax+24]
+
+ por mm0, [eax+32]
+ por mm1, [eax+40]
+
+ por mm0, [eax+48]
+ por mm1, [eax+56]
+
+ por mm0, [eax+64]
+ por mm1, [eax+72]
+
+ por mm0, [eax+80]
+ por mm1, [eax+88]
+
+ por mm0, [eax+96]
+ por mm1, [eax+104]
+
+ por mm0, [eax+112]
+ por mm1, [eax+120]
+
+ por mm0, mm1
+ pcmpgtd mm0, mm2
+ pand mm0, mm4
+ por mm3, mm0 ; cbp |= 1 << (5-i)
+ psrld mm4,1
+
+ shr edx,1
+ lea eax, [eax + 128]
+ jnc .loop
+
+ movq mm0, mm3
+ psrlq mm0, 32
+
+ por mm0, mm3
+ movd eax, mm0
+
+ ret
+.endfunc
+
More information about the XviD-devel
mailing list