[XviD-devel] Proposed patch

Radek Czyz radoslaw at syskin.cjb.net
Mon Nov 5 17:01:57 CET 2007


Hi Carlo

Is the quantizer LUT really any good? Considering that the branches are 
  100% predictable, I'd guess extra memory access is actually slowing it 
down, as well as wasting memory. Unless you can show otherwise.

The rest looks very good though.

Radek

carlo.bramix wrote:
> Hello,
> I would like to suggest the following patch.
> It doesn't apply "dangerous" changes, it should just simplify/improve some bits of code.
> 
> Sincerely,
> 
> Carlo Bramini
> 
> 
> 
> Index: src/decoder.c
> ===================================================================
> RCS file: /xvid/xvidcore/src/decoder.c,v
> retrieving revision 1.80
> diff -u -r1.80 decoder.c
> --- src/decoder.c	16 Apr 2007 19:01:28 -0000	1.80
> +++ src/decoder.c	5 Nov 2007 12:44:48 -0000
> @@ -234,6 +234,14 @@
>    -1, -2, 1, 2
>  };
>  
> +#define DQUANT_SAT(_quant)  dquant_saturate[(_quant) + 1]
> +
> +static const unsigned char dquant_saturate[] = {
> +  1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
> +  13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
> +  23, 24, 25, 26, 27, 28, 29, 30, 31, 31, 31,
> +};
> +
>  /* decode an intra macroblock */
>  static void
>  decoder_mbintra(DECODER * dec,
> @@ -765,11 +773,7 @@
>  
>        if (mb->mode == MODE_INTRA_Q) {
>          quant += dquant_table[BitstreamGetBits(bs, 2)];
> -        if (quant > 31) {
> -          quant = 31;
> -        } else if (quant < 1) {
> -          quant = 1;
> -        }
> +        quant = DQUANT_SAT(quant);
>        }
>        mb->quant = quant;
>        mb->mvs[0].x = mb->mvs[0].y =
> @@ -1009,12 +1013,7 @@
>          if (mb->mode == MODE_INTER_Q || mb->mode == MODE_INTRA_Q) {
>            int dquant = dquant_table[BitstreamGetBits(bs, 2)];
>            DPRINTF(XVID_DEBUG_MB, "dquant %i\n", dquant);
> -          quant += dquant;
> -          if (quant > 31) {
> -            quant = 31;
> -          } else if (quant < 1) {
> -            quant = 1;
> -          }
> +          quant = DQUANT_SAT(quant+dquant);
>            DPRINTF(XVID_DEBUG_MB, "quant %i\n", quant);
>          }
>          mb->quant = quant;
> @@ -1420,11 +1419,7 @@
>            mb->cbp = 0;
>  
>          if (mb->mode && mb->cbp) {
> -          quant += get_dbquant(bs);
> -          if (quant > 31)
> -            quant = 31;
> -          else if (quant < 1)
> -            quant = 1;
> +          quant = DQUANT_SAT(quant + get_dbquant(bs));
>          }
>          mb->quant = quant;
>  
> Index: src/image/x86_asm/colorspace_mmx.inc
> ===================================================================
> RCS file: /xvid/xvidcore/src/image/x86_asm/colorspace_mmx.inc,v
> retrieving revision 1.4
> diff -u -r1.4 colorspace_mmx.inc
> --- src/image/x86_asm/colorspace_mmx.inc	29 Aug 2004 10:02:38 -0000	1.4
> +++ src/image/x86_asm/colorspace_mmx.inc	5 Nov 2007 12:44:49 -0000
> @@ -106,8 +106,7 @@
>    sub ebx, edx
>    mov [x_dif], ebx          ; x_dif = -BYTES*fixed_width - x_stride
>  
> -  mov eax, ebp
> -  sub eax, 1
> +  lea eax, [ebp-1]
>    push edx                
>    mul edx
>    pop edx
> Index: src/image/x86_asm/colorspace_yuv_mmx.asm
> ===================================================================
> RCS file: /xvid/xvidcore/src/image/x86_asm/colorspace_yuv_mmx.asm,v
> retrieving revision 1.6
> diff -u -r1.6 colorspace_yuv_mmx.asm
> --- src/image/x86_asm/colorspace_yuv_mmx.asm	30 Oct 2006 10:52:00 -0000	1.6
> +++ src/image/x86_asm/colorspace_yuv_mmx.asm	5 Nov 2007 12:44:51 -0000
> @@ -47,6 +47,14 @@
>  ; Helper macros
>  ;=============================================================================
>  
> +%macro _MOVQ        3
> +%if %1 == 1
> +    movntq  %2, %3      ; xmm
> +%else
> +    movq    %2, %3      ; plain mmx
> +%endif
> +%endmacro
> +
>  ;------------------------------------------------------------------------------
>  ; PLANE_COPY ( DST, DST_STRIDE, SRC, SRC_STRIDE, WIDTH, HEIGHT, OPT )
>  ; DST		dst buffer
> @@ -100,25 +108,14 @@
>    movq mm7, [esi + 48]
>    movq mm0, [esi + 56]
>  
> -%if OPT == 0                ; plain mmx
> -  movq [edi     ], mm1           ; write to y_out
> -  movq [edi +  8], mm2
> -  movq [edi + 16], mm3
> -  movq [edi + 24], mm4
> -  movq [edi + 32], mm5
> -  movq [edi + 40], mm6
> -  movq [edi + 48], mm7
> -  movq [edi + 56], mm0
> -%else
> -  movntq [edi     ], mm1         ; write to y_out
> -  movntq [edi +  8], mm2
> -  movntq [edi + 16], mm3
> -  movntq [edi + 24], mm4
> -  movntq [edi + 32], mm5
> -  movntq [edi + 40], mm6
> -  movntq [edi + 48], mm7
> -  movntq [edi + 56], mm0
> -%endif
> +  _MOVQ OPT, [edi     ], mm1           ; write to y_out
> +  _MOVQ OPT, [edi +  8], mm2
> +  _MOVQ OPT, [edi + 16], mm3
> +  _MOVQ OPT, [edi + 24], mm4
> +  _MOVQ OPT, [edi + 32], mm5
> +  _MOVQ OPT, [edi + 40], mm6
> +  _MOVQ OPT, [edi + 48], mm7
> +  _MOVQ OPT, [edi + 56], mm0
>  
>    add esi, 64
>    add edi, 64
> @@ -133,13 +130,9 @@
>  %%loop16_pc:
>    movq mm1, [esi]
>    movq mm2, [esi + 8]
> -%if OPT == 0                ; plain mmx
> -  movq [edi], mm1
> -  movq [edi + 8], mm2
> -%else
> -  movntq [edi], mm1
> -  movntq [edi + 8], mm2
> -%endif
> +  _MOVQ OPT, [edi], mm1
> +  _MOVQ OPT, [edi + 8], mm2
>  
>    add esi, 16
>    add edi, 16
> @@ -195,26 +188,14 @@
>    jz %%loop16_start_pf
>  
>  %%loop64_pf:
> -
> -%if OPT == 0                ; plain mmx
> -  movq [edi     ], mm0          ; write to y_out
> -  movq [edi +  8], mm0
> -  movq [edi + 16], mm0
> -  movq [edi + 24], mm0
> -  movq [edi + 32], mm0
> -  movq [edi + 40], mm0
> -  movq [edi + 48], mm0
> -  movq [edi + 56], mm0
> -%else
> -  movntq [edi     ], mm0        ; write to y_out
> -  movntq [edi +  8], mm0
> -  movntq [edi + 16], mm0
> -  movntq [edi + 24], mm0
> -  movntq [edi + 32], mm0
> -  movntq [edi + 40], mm0
> -  movntq [edi + 48], mm0
> -  movntq [edi + 56], mm0
> -%endif
> +  _MOVQ OPT, [edi     ], mm0          ; write to y_out
> +  _MOVQ OPT, [edi +  8], mm0
> +  _MOVQ OPT, [edi + 16], mm0
> +  _MOVQ OPT, [edi + 24], mm0
> +  _MOVQ OPT, [edi + 32], mm0
> +  _MOVQ OPT, [edi + 40], mm0
> +  _MOVQ OPT, [edi + 48], mm0
> +  _MOVQ OPT, [edi + 56], mm0
>  
>    add edi, 64
>    loop %%loop64_pf
> @@ -225,13 +206,8 @@
>    jz %%loop1_start_pf
>  
>  %%loop16_pf:
> -%if OPT == 0                ; plain mmx
> -  movq [edi    ], mm0
> -  movq [edi + 8], mm0
> -%else
> -  movntq [edi    ], mm0
> -  movntq [edi + 8], mm0
> -%endif
> +  _MOVQ OPT, [edi    ], mm0
> +  _MOVQ OPT, [edi + 8], mm0
>  
>    add edi, 16
>    loop %%loop16_pf
> Index: src/image/x86_asm/interpolate8x8_mmx.asm
> ===================================================================
> RCS file: /xvid/xvidcore/src/image/x86_asm/interpolate8x8_mmx.asm,v
> retrieving revision 1.18
> diff -u -r1.18 interpolate8x8_mmx.asm
> --- src/image/x86_asm/interpolate8x8_mmx.asm	13 Sep 2005 12:12:15 -0000	1.18
> +++ src/image/x86_asm/interpolate8x8_mmx.asm	5 Nov 2007 12:44:53 -0000
> @@ -574,7 +574,6 @@
>  
>    mov eax, [esp + 4 + 24]   ; height -> eax
>    sub eax, 8
> -  test eax, eax
>  
>    mov ecx, [esp + 4 + 4]    ; dst -> edi
>    mov eax, [esp + 4 + 8]    ; src1 -> esi
> @@ -604,7 +603,6 @@
>  .rounding1
>    mov eax, [esp + 4 + 24]       ; height -> eax
>    sub eax, 8
> -  test eax, eax
>  
>    mov ecx, [esp + 4 + 4]        ; dst -> edi
>    mov eax, [esp + 4 + 8]        ; src1 -> esi
> @@ -1024,9 +1022,7 @@
>    mov eax, [esp + 4 + 8]            ; src -> esi
>    mov edx, [esp + 4 + 12]           ; stride -> edx
>  
> -  mov ebx, edx
> -  shl ebx, 1
> -  add ebx, edx
> +  lea ebx, [edx+edx*2]
>  
>    pxor mm7, mm7
>  
> Index: src/image/x86_asm/postprocessing_mmx.asm
> ===================================================================
> RCS file: /xvid/xvidcore/src/image/x86_asm/postprocessing_mmx.asm,v
> retrieving revision 1.4
> diff -u -r1.4 postprocessing_mmx.asm
> --- src/image/x86_asm/postprocessing_mmx.asm	29 Aug 2004 10:02:38 -0000	1.4
> +++ src/image/x86_asm/postprocessing_mmx.asm	5 Nov 2007 12:44:53 -0000
> @@ -115,7 +115,7 @@
>  	jl .xloop
>  
>  	add edx, ecx				; dst += stride
> -	sub edi, 1
> +	dec edi
>  	jg .yloop
>  
>  	pop edi
> Index: src/image/x86_asm/postprocessing_sse2.asm
> ===================================================================
> RCS file: /xvid/xvidcore/src/image/x86_asm/postprocessing_sse2.asm,v
> retrieving revision 1.5
> diff -u -r1.5 postprocessing_sse2.asm
> --- src/image/x86_asm/postprocessing_sse2.asm	29 Aug 2004 10:02:38 -0000	1.5
> +++ src/image/x86_asm/postprocessing_sse2.asm	5 Nov 2007 12:44:55 -0000
> @@ -130,7 +130,7 @@
>    jl .xloop
>  
>    add edx, ecx                  ; dst += stride
> -  sub edi, 1
> +  dec edi
>    jg .yloop
>  
>    add esp, 32
> Index: src/image/x86_asm/reduced_mmx.asm
> ===================================================================
> RCS file: /xvid/xvidcore/src/image/x86_asm/reduced_mmx.asm,v
> retrieving revision 1.6
> diff -u -r1.6 reduced_mmx.asm
> --- src/image/x86_asm/reduced_mmx.asm	29 Aug 2004 10:02:38 -0000	1.6
> +++ src/image/x86_asm/reduced_mmx.asm	5 Nov 2007 12:44:57 -0000
> @@ -758,7 +758,7 @@
>    packuswb mm1, mm7
>    movd [esi+eax*4], mm0
>    movd [edi+eax*4], mm1
> -  add eax,1
> +  inc eax
>    jl .Loop
>  
>    pop edi
> 
> _______________________________________________
> XviD-devel mailing list
> XviD-devel at xvid.org
> http://list.xvid.org/mailman/listinfo/xvid-devel
> 
> 


More information about the XviD-devel mailing list