No subject
Mon Sep 3 00:56:13 CEST 2007
> Hi Carlo
>
> Is the quantizer LUT really any good? Considering that the branches are=
> 100% predictable, I'd guess extra memory access is actually slowing i=
t
> down, as well as wasting memory. Unless you can show otherwise.
>
> The rest looks very good though.
>
> Radek
>
> carlo.bramix wrote:
> > Hello,
> > I would like to suggest the following patch.
> > It doesn't apply "dangerous" changes, it should just simplify/improve=
some bits of code.
> >
> > Sincerely,
> >
> > Carlo Bramini
> >
> >
> >
> > Index: src/decoder.c
> > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /xvid/xvidcore/src/decoder.c,v
> > retrieving revision 1.80
> > diff -u -r1.80 decoder.c
> > --- src/decoder.c 16 Apr 2007 19:01:28 -0000 1.80
> > +++ src/decoder.c 5 Nov 2007 12:44:48 -0000
> > @@ -234,6 +234,14 @@
> > -1, -2, 1, 2
> > };
> >
> > +#define DQUANT_SAT(_quant) dquant_saturate[(_quant) + 1]
> > +
> > +static const unsigned char dquant_saturate[] =3D {
> > + 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
> > + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
> > + 23, 24, 25, 26, 27, 28, 29, 30, 31, 31, 31,
> > +};
> > +
> > /* decode an intra macroblock */
> > static void
> > decoder_mbintra(DECODER * dec,
> > @@ -765,11 +773,7 @@
> >
> > if (mb->mode =3D=3D MODE_INTRA_Q) {
> > quant +=3D dquant_table[BitstreamGetBits(bs, 2)];
> > - if (quant > 31) {
> > - quant =3D 31;
> > - } else if (quant < 1) {
> > - quant =3D 1;
> > - }
> > + quant =3D DQUANT_SAT(quant);
> > }
> > mb->quant =3D quant;
> > mb->mvs[0].x =3D mb->mvs[0].y =3D
> > @@ -1009,12 +1013,7 @@
> > if (mb->mode =3D=3D MODE_INTER_Q || mb->mode =3D=3D MODE_INT=
RA_Q) {
> > int dquant =3D dquant_table[BitstreamGetBits(bs, 2)];
> > DPRINTF(XVID_DEBUG_MB, "dquant %i\n", dquant);
> > - quant +=3D dquant;
> > - if (quant > 31) {
> > - quant =3D 31;
> > - } else if (quant < 1) {
> > - quant =3D 1;
> > - }
> > + quant =3D DQUANT_SAT(quant+dquant);
> > DPRINTF(XVID_DEBUG_MB, "quant %i\n", quant);
> > }
> > mb->quant =3D quant;
> > @@ -1420,11 +1419,7 @@
> > mb->cbp =3D 0;
> >
> > if (mb->mode && mb->cbp) {
> > - quant +=3D get_dbquant(bs);
> > - if (quant > 31)
> > - quant =3D 31;
> > - else if (quant < 1)
> > - quant =3D 1;
> > + quant =3D DQUANT_SAT(quant + get_dbquant(bs));
> > }
> > mb->quant =3D quant;
> >
> > Index: src/image/x86_asm/colorspace_mmx.inc
> > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /xvid/xvidcore/src/image/x86_asm/colorspace_mmx.inc,v
> > retrieving revision 1.4
> > diff -u -r1.4 colorspace_mmx.inc
> > --- src/image/x86_asm/colorspace_mmx.inc 29 Aug 2004 10:02:38 -0000 1=
.4
> > +++ src/image/x86_asm/colorspace_mmx.inc 5 Nov 2007 12:44:49 -0000
> > @@ -106,8 +106,7 @@
> > sub ebx, edx
> > mov [x_dif], ebx ; x_dif =3D -BYTES*fixed_width - x_strid=
e
> >
> > - mov eax, ebp
> > - sub eax, 1
> > + lea eax, [ebp-1]
> > push edx
> > mul edx
> > pop edx
> > Index: src/image/x86_asm/colorspace_yuv_mmx.asm
> > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /xvid/xvidcore/src/image/x86_asm/colorspace_yuv_mmx.asm,v
> > retrieving revision 1.6
> > diff -u -r1.6 colorspace_yuv_mmx.asm
> > --- src/image/x86_asm/colorspace_yuv_mmx.asm 30 Oct 2006 10:52:00 -00=
00 1.6
> > +++ src/image/x86_asm/colorspace_yuv_mmx.asm 5 Nov 2007 12:44:51 -000=
0
> > @@ -47,6 +47,14 @@
> > ; Helper macros
> > ;=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D
> >
> > +%macro _MOVQ 3
> > +%if %1 =3D=3D 1
> > + movntq %2, %3 ; xmm
> > +%else
> > + movq %2, %3 ; plain mmx
> > +%endif
> > +%endmacro
> > +
> > ;-------------------------------------------------------------------=
-----------
> > ; PLANE_COPY ( DST, DST_STRIDE, SRC, SRC_STRIDE, WIDTH, HEIGHT, OPT =
)
> > ; DST dst buffer
> > @@ -100,25 +108,14 @@
> > movq mm7, [esi + 48]
> > movq mm0, [esi + 56]
> >
> > -%if OPT =3D=3D 0 ; plain mmx
> > - movq [edi ], mm1 ; write to y_out
> > - movq [edi + 8], mm2
> > - movq [edi + 16], mm3
> > - movq [edi + 24], mm4
> > - movq [edi + 32], mm5
> > - movq [edi + 40], mm6
> > - movq [edi + 48], mm7
> > - movq [edi + 56], mm0
> > -%else
> > - movntq [edi ], mm1 ; write to y_out
> > - movntq [edi + 8], mm2
> > - movntq [edi + 16], mm3
> > - movntq [edi + 24], mm4
> > - movntq [edi + 32], mm5
> > - movntq [edi + 40], mm6
> > - movntq [edi + 48], mm7
> > - movntq [edi + 56], mm0
> > -%endif
> > + _MOVQ OPT, [edi ], mm1 ; write to y_out
> > + _MOVQ OPT, [edi + 8], mm2
> > + _MOVQ OPT, [edi + 16], mm3
> > + _MOVQ OPT, [edi + 24], mm4
> > + _MOVQ OPT, [edi + 32], mm5
> > + _MOVQ OPT, [edi + 40], mm6
> > + _MOVQ OPT, [edi + 48], mm7
> > + _MOVQ OPT, [edi + 56], mm0
> >
> > add esi, 64
> > add edi, 64
> > @@ -133,13 +130,9 @@
> > %%loop16_pc:
> > movq mm1, [esi]
> > movq mm2, [esi + 8]
> > -%if OPT =3D=3D 0 ; plain mmx
> > - movq [edi], mm1
> > - movq [edi + 8], mm2
> > -%else
> > - movntq [edi], mm1
> > - movntq [edi + 8], mm2
> > -%endif
> > + _MOVQ OPT, [edi], mm1
> > + _MOVQ OPT, [edi + 8], mm2
> >
> > add esi, 16
> > add edi, 16
> > @@ -195,26 +188,14 @@
> > jz %%loop16_start_pf
> >
> > %%loop64_pf:
> > -
> > -%if OPT =3D=3D 0 ; plain mmx
> > - movq [edi ], mm0 ; write to y_out
> > - movq [edi + 8], mm0
> > - movq [edi + 16], mm0
> > - movq [edi + 24], mm0
> > - movq [edi + 32], mm0
> > - movq [edi + 40], mm0
> > - movq [edi + 48], mm0
> > - movq [edi + 56], mm0
> > -%else
> > - movntq [edi ], mm0 ; write to y_out
> > - movntq [edi + 8], mm0
> > - movntq [edi + 16], mm0
> > - movntq [edi + 24], mm0
> > - movntq [edi + 32], mm0
> > - movntq [edi + 40], mm0
> > - movntq [edi + 48], mm0
> > - movntq [edi + 56], mm0
> > -%endif
> > + _MOVQ OPT, [edi ], mm0 ; write to y_out
> > + _MOVQ OPT, [edi + 8], mm0
> > + _MOVQ OPT, [edi + 16], mm0
> > + _MOVQ OPT, [edi + 24], mm0
> > + _MOVQ OPT, [edi + 32], mm0
> > + _MOVQ OPT, [edi + 40], mm0
> > + _MOVQ OPT, [edi + 48], mm0
> > + _MOVQ OPT, [edi + 56], mm0
> >
> > add edi, 64
> > loop %%loop64_pf
> > @@ -225,13 +206,8 @@
> > jz %%loop1_start_pf
> >
> > %%loop16_pf:
> > -%if OPT =3D=3D 0 ; plain mmx
> > - movq [edi ], mm0
> > - movq [edi + 8], mm0
> > -%else
> > - movntq [edi ], mm0
> > - movntq [edi + 8], mm0
> > -%endif
> > + _MOVQ OPT, [edi ], mm0
> > + _MOVQ OPT, [edi + 8], mm0
> >
> > add edi, 16
> > loop %%loop16_pf
> > Index: src/image/x86_asm/interpolate8x8_mmx.asm
> > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /xvid/xvidcore/src/image/x86_asm/interpolate8x8_mmx.asm,v
> > retrieving revision 1.18
> > diff -u -r1.18 interpolate8x8_mmx.asm
> > --- src/image/x86_asm/interpolate8x8_mmx.asm 13 Sep 2005 12:12:15 -00=
00 1.18
> > +++ src/image/x86_asm/interpolate8x8_mmx.asm 5 Nov 2007 12:44:53 -000=
0
> > @@ -574,7 +574,6 @@
> >
> > mov eax, [esp + 4 + 24] ; height -> eax
> > sub eax, 8
> > - test eax, eax
> >
> > mov ecx, [esp + 4 + 4] ; dst -> edi
> > mov eax, [esp + 4 + 8] ; src1 -> esi
> > @@ -604,7 +603,6 @@
> > .rounding1
> > mov eax, [esp + 4 + 24] ; height -> eax
> > sub eax, 8
> > - test eax, eax
> >
> > mov ecx, [esp + 4 + 4] ; dst -> edi
> > mov eax, [esp + 4 + 8] ; src1 -> esi
> > @@ -1024,9 +1022,7 @@
> > mov eax, [esp + 4 + 8] ; src -> esi
> > mov edx, [esp + 4 + 12] ; stride -> edx
> >
> > - mov ebx, edx
> > - shl ebx, 1
> > - add ebx, edx
> > + lea ebx, [edx+edx*2]
> >
> > pxor mm7, mm7
> >
> > Index: src/image/x86_asm/postprocessing_mmx.asm
> > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /xvid/xvidcore/src/image/x86_asm/postprocessing_mmx.asm,v
> > retrieving revision 1.4
> > diff -u -r1.4 postprocessing_mmx.asm
> > --- src/image/x86_asm/postprocessing_mmx.asm 29 Aug 2004 10:02:38 -00=
00 1.4
> > +++ src/image/x86_asm/postprocessing_mmx.asm 5 Nov 2007 12:44:53 -000=
0
> > @@ -115,7 +115,7 @@
> > jl .xloop
> >
> > add edx, ecx ; dst +=3D stride
> > - sub edi, 1
> > + dec edi
> > jg .yloop
> >
> > pop edi
> > Index: src/image/x86_asm/postprocessing_sse2.asm
> > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /xvid/xvidcore/src/image/x86_asm/postprocessing_sse2.asm,v
> > retrieving revision 1.5
> > diff -u -r1.5 postprocessing_sse2.asm
> > --- src/image/x86_asm/postprocessing_sse2.asm 29 Aug 2004 10:02:38 -0=
000 1.5
> > +++ src/image/x86_asm/postprocessing_sse2.asm 5 Nov 2007 12:44:55 -00=
00
> > @@ -130,7 +130,7 @@
> > jl .xloop
> >
> > add edx, ecx ; dst +=3D stride
> > - sub edi, 1
> > + dec edi
> > jg .yloop
> >
> > add esp, 32
> > Index: src/image/x86_asm/reduced_mmx.asm
> > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /xvid/xvidcore/src/image/x86_asm/reduced_mmx.asm,v
> > retrieving revision 1.6
> > diff -u -r1.6 reduced_mmx.asm
> > --- src/image/x86_asm/reduced_mmx.asm 29 Aug 2004 10:02:38 -0000 1.6
> > +++ src/image/x86_asm/reduced_mmx.asm 5 Nov 2007 12:44:57 -0000
> > @@ -758,7 +758,7 @@
> > packuswb mm1, mm7
> > movd [esi+eax*4], mm0
> > movd [edi+eax*4], mm1
> > - add eax,1
> > + inc eax
> > jl .Loop
> >
> > pop edi
> >
> > _______________________________________________
> > XviD-devel mailing list
> > XviD-devel at xvid.org
> > http://list.xvid.org/mailman/listinfo/xvid-devel
> >
> >
> _______________________________________________
> XviD-devel mailing list
> XviD-devel at xvid.org
> http://list.xvid.org/mailman/listinfo/xvid-devel
>
More information about the XviD-devel
mailing list