[XviD-devel] MMX improvements

carlo.bramix carlo.bramix at libero.it
Tue Jul 12 23:52:18 CEST 2005


I did a new patch file as requested.
I hope this could be more useful than previous one.

Sincerely,

Carlo Bramini.

---------- Initial Header -----------

>From      : xvid-devel-bounces at xvid.org
To          : xvid-devel at xvid.org
Cc          : 
Date      : Tue, 12 Jul 2005 14:58:16 +0200
Subject : Re: [XviD-devel] MMX improvements







> Hi,
> 
> On 7/12/05, carlo.bramix <carlo.bramix at libero.it> wrote:
> > Ok, after some hours of work and study (I had to understand how CVS works) I was able to make the patch with TortoiseCVS.
> 
> It's unfortunately not the way it should be; If you have a closer look
> at your patch, it mostly consists in deleting the whole file to
> replace it with your implementation, which *could* be fine if you had
> found a revolutionary way to schedule the SIMD code, but I doubt it.
> All the more since your patch also changes the header, which has
> nothing to do with your MMX optimizations.
> 
> This makes the reviewing task of the maintainers a little more difficult.
> 
> I don't know how tortoiseCVS works, but you should try to find an
> option that discards white space changes to make this patch more
> readable.
> 
> Could you maybe resend a clean patch?
> 
> Regards,
> Guillaume
> -- 
> A lie gets halfway around the world before the truth has a chance to
> get its pants on.
>  -- Winston Churchill
> _______________________________________________
> XviD-devel mailing list
> XviD-devel at xvid.org
> http://list.xvid.org/mailman/listinfo/xvid-devel
> 



____________________________________________________________
Navighi a 4 MEGA e i primi 3 mesi sono GRATIS. 
Scegli Libero Adsl Flat senza limiti su http://www.libero.it
-------------- next part --------------
Index: src/bitstream/x86_asm/cbp_mmx.asm
===================================================================
RCS file: /xvid/xvidcore/src/bitstream/x86_asm/cbp_mmx.asm,v
retrieving revision 1.12
diff -u -r1.12 cbp_mmx.asm
--- src/bitstream/x86_asm/cbp_mmx.asm	29 Aug 2004 10:02:38 -0000	1.12
+++ src/bitstream/x86_asm/cbp_mmx.asm	11 Jul 2005 22:15:40 -0000
@@ -20,7 +20,7 @@
 ; *  along with this program ; if not, write to the Free Software
 ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 ; *
-; * $Id: cbp_mmx.asm,v 1.12 2004/08/29 10:02:38 edgomez Exp $
+; * $Id$
 ; *
 ; ***************************************************************************/
 
@@ -60,8 +60,10 @@
 
 ALIGN 16
 
+shift_mask:
+    dd 0x00000020, 0x00000020
 ignore_dc:
-	dw 0, -1, -1, -1, -1, -1, -1, -1
+    dw 0, -1, -1, -1
 
 ;=============================================================================
 ; Code
@@ -77,59 +79,55 @@
 
 ALIGN 16
 calc_cbp_mmx:
-  push ebx
-  push esi
-
-  mov esi, [esp + 8 + 4]	; coeff
-  xor eax, eax			; cbp = 0
+  mov eax, [esp + 4]            ; coeff
   mov edx, (1 << 5)
 
+  movq mm4, [shift_mask]
+  psubd mm2, mm2                ; used only for comparing
   movq mm7, [ignore_dc]
+  psubd mm3, mm3                ; cbp = 0
 
 .loop
-  movq mm0, [esi]
-  movq mm1, [esi+8]
+  movq mm0, [eax]
+  movq mm1, [eax+8]
   pand mm0, mm7
 
-  por mm0, [esi+16]
-  por mm1, [esi+24]
-
-  por mm0, [esi+32]
-  por mm1, [esi+40]
+  por mm0, [eax+16]
+  por mm1, [eax+24]
 
-  por mm0, [esi+48]
-  por mm1, [esi+56]
+  por mm0, [eax+32]
+  por mm1, [eax+40]
 
-  por mm0, [esi+64]
-  por mm1, [esi+72]
+  por mm0, [eax+48]
+  por mm1, [eax+56]
 
-  por mm0, [esi+80]
-  por mm1, [esi+88]
+  por mm0, [eax+64]
+  por mm1, [eax+72]
 
-  por mm0, [esi+96]
-  por mm1, [esi+104]
+  por mm0, [eax+80]
+  por mm1, [eax+88]
 
-  por mm0, [esi+112]
-  por mm1, [esi+120]
+  por mm0, [eax+96]
+  por mm1, [eax+104]
 
-  por mm0, mm1
-  movq mm1, mm0
-  psrlq mm1, 32
-  lea esi, [esi + 128]
+  por mm0, [eax+112]
+  por mm1, [eax+120]
 
   por mm0, mm1
-  movd ebx, mm0
+  pcmpgtd mm0, mm2
+  pand mm0, mm4
+  por mm3, mm0                  ; cbp |= 1 << (5-i)
+  psrld mm4,1
 
-  test ebx, ebx
-  jz .next
-  or eax, edx     ; cbp |= 1 << (5-i)
-
-.next
   shr edx,1
+  lea eax, [eax + 128]
   jnc .loop
 
-  pop esi
-  pop ebx
+  movq mm0, mm3
+  psrlq mm0, 32
+
+  por mm0, mm3
+  movd eax, mm0
 
   ret
 .endfunc


More information about the XviD-devel mailing list