[XviD-devel] GMC rc3 - TODO

14 Jan 2003 19:27:43 +0100

--=-APBwBi9oN659GV+PbUMx
Content-Type: text/plain
Content-Transfer-Encoding: 7bit

	Hi,

On Sun, 2003-01-12 at 13:51, Christoph Lampert wrote:

> Anyway, a first step would be to change these 
> 
> int F= i0s + ( ((-r*i0s+i1ss)*I + (r*j0s-j1ss)*J +(1<<(alpha+rho-1))) >>  (alpha+rho) );
> int G= j0s + ( ((-r*j0s+j1ss)*I + (-r*i0s+i1ss)*J +(1<<(alpha+rho-1))) >> (alpha+rho) );
> 
> difficult and wasteful calculations to a incremental approach, only add
> something every step instead of multiplying. 

	here's a test-bed (a la xvid_bench.c) for future tests on GMC.
	I've also quickly hacked a fixed-point incremental version,
	just to be sure I still can beat a linux compiler :)

	this is most probably my last contribution to XVID. Farewell!

		Skal

--=-APBwBi9oN659GV+PbUMx
Content-Disposition: attachment; filename=xvid_gmc.c
Content-Transfer-Encoding: quoted-printable
Content-Type: text/x-c; name=xvid_gmc.c; charset=ISO-8859-1

/**************************************************************************
 *
 *      XVID MPEG-4 VIDEO CODEC - Unit tests and benches for GMC
 *
 *      This program is free software; you can redistribute it and/or modif=
y
 *      it under the terms of the GNU General Public License as published b=
y
 *      the Free Software Foundation; either version 2 of the License, or
 *      (at your option) any later version.
 *
 *      This program is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *      GNU General Public License for more details.
 *
 *      You should have received a copy of the GNU General Public License
 *      along with this program; if not, write to the Free Software
 *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *************************************************************************/

/************************************************************************
 *                           =20
 *  History:
 *
 *  01.04.2003  alt impl. w/ 4b fixed-point      - Skal -
 *
 *************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>  // for gettimeofday
#include <string.h>    // for memset
#include <assert.h>

#include "xvid.h"
#include "portab.h"
#include "global.h"

// inner guts
#include "image/image.h"
#include "utils/mbfunctions.h"
#include "motion/motion.h"
#include "utils/mem_transfer.h"
#include "utils/emms.h"
#include "utils/timer.h"

#include <math.h>
const int speed_ref =3D 50;

/*********************************************************************
 * misc
 *********************************************************************/

 /* returns time in micro-s*/
double gettime_usec()
{   =20
  struct timeval  tv;
  gettimeofday(&tv, 0);
  return tv.tv_sec*1.0e6 + tv.tv_usec;
}

void write_pgm(int w, int h, int BpS, int BpSc, IMAGE *I)
{
	static int nb =3D 0;
  int i;
	FILE *f;
	char name[64];

	sprintf( name, "dec%.5d.pgm", nb++ );
	f =3D fopen(name, "wb");
	if (!f) return;

  fprintf(f, "P5\n\n%d %d 255\n", w, h*3/2);
  for(i=3D0; i<h; ++i)
    fwrite(I->y + i*BpS, w, 1, f);
  for(i=3D0; i<h/2; ++i) {
    fwrite(I->u + i*BpSc, w/2,1, f);
    fwrite(I->v + i*BpSc, w/2,1, f);
  }
  fclose(f);
}

/* Pseudo-random generator specified by IEEE 1180 */

static long ieee_seed =3D 1;
static void ieee_reseed(long s) {
  ieee_seed =3D s;
}
static long ieee_rand(int Min, int Max)
{
  static double z =3D (double) 0x7fffffff;

  long i,j;
  double x;

  ieee_seed =3D (ieee_seed * 1103515245) + 12345;
  i =3D ieee_seed & 0x7ffffffe;
  x =3D ((double) i) / z;
  x *=3D (Max-Min+1);
  j =3D (long)x;
  j =3D j + Min;
  assert(j>=3DMin && j<=3DMax);
  return (short)j;
}

/*********************************************************************
 * GMC decoding test
 *********************************************************************/

typedef void (*GMC_Param_Func)( const int num_wp, const int res,
                                const WARPPOINTS *const warp,
                                const int width, const int height,
                                GMC_DATA *const gmc );

typedef VECTOR (*GMC_MB_Func)( const GMC_DATA *const gmc_data,
                               const IMAGE *const pRef,
                               const int mi, const int mj,
                               const int stride,
                               const int stride2,
                               const int quarterpel,
                               const int rounding,
                               IMAGE *const pGMC);

void test_GMC(GMC_Param_Func PFunc, GMC_MB_Func MFunc )
{
  double t;
  const int nb_tests =3D speed_ref;
  const int W =3D 64, H =3D 64;
  const int EW =3D ((W+15)&~15) + 2*EDGE_SIZE;    // <=3D> stride
  const int EH =3D ((H+15)&~15) + 2*EDGE_SIZE;
  const int YSize =3D EW*EH;
  const int UVSize =3D (EW/2)*(EH/2);
  const int offset  =3D EDGE_SIZE  + EDGE_SIZE*EW;
  const int offset2 =3D EDGE_SIZE/2+(EDGE_SIZE/2)*(EW/2);
  uint8_t Bits[2][YSize+2*UVSize];

  IMAGE Src, Dst;

  int res, nb_warp, i, n, x, y;
  int qpel, rounding;
  WARPPOINTS warps;
  GMC_DATA data;
  VECTOR AMv;

  Src.y =3D Bits[0]       +offset;
  Src.u =3D Bits[0]+YSize +offset2;
  Src.v =3D Src.u + UVSize;
  Dst.y =3D Bits[1]       +offset;
  Dst.u =3D Bits[1]+YSize +offset2;
  Dst.v =3D Dst.u + UVSize;

  printf( "\n =3D=3D=3D=3D=3D  test GMC =3D=3D=3D=3D=3D\n" );
  for(res=3D2; res<=3D16; res<<=3D1) {
    int32_t crc =3D 0;
    printf( "res=3D%d ... ", res );
    t =3D -gettime_usec();
    for(qpel=3D0; qpel<=3D1; ++qpel) {
      for(rounding=3D0; rounding<=3D1; ++rounding) {
        for(nb_warp=3D0; nb_warp<=3D2; ++nb_warp) {
          printf( "[%d]", nb_warp );
          ieee_reseed(13*nb_warp+37*rounding+qpel+53);
          for(n=3D0; n<nb_tests; ++n) {
            for(i=3D0; i<3;++i) {
              warps.duv[i].x =3D ieee_rand(-32*res,32*res);
              warps.duv[i].y =3D ieee_rand(-32*res,32*res);
            }
            PFunc( nb_warp, res, &warps, W, H, &data );

            for(x=3D0; x<YSize+2*UVSize; ++x) Bits[0][x] =3D x&0xff;
            //write_pgm(W,H,EW,EW/2,&Src);

            for(x=3D0; x<YSize+2*UVSize; ++x) Bits[1][x] =3D 0;
            for(y=3D0; y<H/16; y++) {
              for(x=3D0; x<W/16; ++x) {
                AMv =3D MFunc(&data, &Src, x, y, EW, EW/2, qpel, rounding, =
&Dst);
                crc +=3D (AMv.y^AMv.x) - 3*AMv.x;  // pitiful CRC'ing
              }
            }
//            write_pgm(W,H,EW,EW/2,&Dst);
            for(x=3D0; x<YSize+2*UVSize; ++x)
              crc =3D ((crc>>2)^(Bits[1][x]<<2)) + ((crc^Bits[1][x])&0xffff=
);
          }
        }
      }
    }
    t +=3D gettime_usec();
    printf( " ... crc=3D0x%x  time=3D%.3lf sec\n", crc, t/1000000. );
  }

 =20
}

/*********************************************************************
 * alt implementation
 *********************************************************************/

  /* let's use some (forbidden) statics until they make it into GMC_DATA */

static int32_t Cos, Sin;
static int32_t Fo, Go;
static int32_t cCos, cSin;
static int32_t cFo, cGo;

#ifndef RSHIFT
#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-=
1))-1)>>(b))
#endif

/* assume b>0 */
#ifndef RDIV
#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
#endif

static __inline int gmc_sanitize(int value, int quarterpel, int fcode)
{
  int length =3D 1 << (fcode+4);

//  if (quarterpel) value *=3D 2;

  if (value < -length)
    return -length;
  else if (value >=3D length)
    return length-1;
  else return value;
}

static uint32_t __inline
log2bin(uint32_t value)
{
  int n =3D 0;
  while (value) {
    value >>=3D 1;
    n++;
  }
  return n;
}

void my_GMCparameters( const int num_wp, const int res,
                       const WARPPOINTS *const warp,
                       const int width, const int height,
                       GMC_DATA *const gmc)
{

  int tmp;
  int du0 =3D warp->duv[0].x;
  int dv0 =3D warp->duv[0].y;
  int du1 =3D warp->duv[1].x;
  int dv1 =3D warp->duv[1].y;

  gmc->num_wp =3D num_wp;

  gmc->s =3D res;            /* scaling parameters 2,4,8 or 16 */
  gmc->sigma =3D log2bin(res-1);  /* log2bin(15)=3D4, log2bin(16)=3D5, log2=
bin(17)=3D5  */
  gmc->r =3D 16/res;=20
  gmc->rho =3D 4 - gmc->sigma;     /* =3D log2bin(r-1) */

  gmc->W =3D width;
  gmc->H =3D height;      /* fixed reference coordinates */

  gmc->alpha =3D log2bin(gmc->W-1);
  gmc->Ws =3D 1<<gmc->alpha;=20

  gmc->i0s =3D res/2 * ( du0 );
  gmc->j0s =3D res/2 * ( dv0 );
  gmc->i1s =3D res/2 * (2*width + du1 + du0 );
  gmc->j1s =3D res/2 * ( dv1 + dv0 );
 =20
  tmp  =3D (gmc->W-gmc->Ws)*(gmc->r*gmc->i0s);
  tmp +=3D gmc->Ws*(gmc->r*gmc->i1s - 16*gmc->W);
  gmc->i1ss =3D 16*gmc->Ws + RDIV(tmp,gmc->W);=20

  tmp =3D (gmc->W - gmc->Ws)*(gmc->r*gmc->j0s);
  tmp +=3D gmc->Ws*gmc->r*gmc->j1s;
  gmc->j1ss =3D RDIV( tmp, gmc->W );  =20

  Cos =3D gmc->i1ss - gmc->r*gmc->i0s;
  Sin =3D gmc->j1ss - gmc->r*gmc->j0s;
  Fo  =3D (2*gmc->i0s + 1) << (gmc->alpha+gmc->rho-1);
  Go  =3D (2*gmc->j0s + 1) << (gmc->alpha+gmc->rho-1);

  cCos =3D 4*Cos;
  cSin =3D 4*Sin;

  cFo =3D Cos - Sin + (1 << (gmc->alpha+gmc->rho+1));
  cFo +=3D 2*gmc->Ws*gmc->r*gmc->i0s - 16*gmc->Ws;

  cGo  =3D Sin + Cos + (1 << (gmc->alpha+gmc->rho+1));
  cGo +=3D 2*gmc->Ws*gmc->r*gmc->j0s - 16*gmc->Ws;
}

VECTOR my_GMCimageMB( const GMC_DATA *const gmc_data,
                      const IMAGE *const pRef,
                      const int mi, const int mj,
                      const int stride,
                      const int stride2,
                      const int quarterpel,
                      const int rounding,
                      IMAGE *const pGMC)
{
  const int W =3D gmc_data->W;
  const int H =3D gmc_data->H;

  const int rho =3D gmc_data->rho;=20
  const int alpha =3D gmc_data->alpha;=20

  const int rounder =3D 128 - (rounding<<(rho+rho));

  uint8_t *dstY, *dstU, *dstV;

  int I,J;
  VECTOR avgMV =3D {0,0};

  int32_t Fj, Gj;

  dstY =3D &pGMC->y[(mj*16)*stride+mi*16] + 16;

  Fj =3D Fo - Sin*(mj*16) + Cos*(mi*16);
  Gj =3D Go + Cos*(mj*16) + Sin*(mi*16);
  for (J=3D16; J>0; --J)
  {
    int32_t Fi, Gi;
   =20
    Fi =3D Fj; Fj -=3D Sin;
    Gi =3D Gj; Gj +=3D Cos;
    for (I=3D-16; I<0; ++I)
    {
      int32_t F, G, ri, rj;
      int Y00,Y01,Y10,Y11;

      F =3D ( Fi >> (alpha+rho) ) << rho; Fi +=3D Cos;=20
      G =3D ( Gi >> (alpha+rho) ) << rho; Gi +=3D Sin;

      avgMV.x +=3D F;
      avgMV.y +=3D G;

      ri =3D F & 15;
      rj =3D G & 15;

      F >>=3D 4;
      G >>=3D 4;

      if (F< -1) F=3D-1;
      else if (F>W) F=3DW;
      if (G< -1) G=3D-1;
      else if (G>H) G=3DH;

      Y00 =3D pRef->y[ G*stride + F   ];
      Y01 =3D pRef->y[ G*stride + F+1 ];
      Y10 =3D pRef->y[ G*stride + F   + stride ];
      Y11 =3D pRef->y[ G*stride + F+1 + stride ];

      Y01 =3D (Y00<<4) + ri*(Y01-Y00);
      Y11 =3D (Y10<<4) + ri*(Y11-Y10);
      Y11 =3D (Y01<<4) + rj*(Y11-Y01);
      Y00 =3D ( Y11 + rounder ) >> 8;

      dstY[I] =3D (uint8_t)Y00;
    }
    dstY +=3D stride;
  }

  dstU =3D &pGMC->u[(mj*8)*stride2+mi*8] + 8;
  dstV =3D &pGMC->v[(mj*8)*stride2+mi*8] + 8;

  Fj =3D cFo - cSin*(mj*8) + cCos*(mi*8);
  Gj =3D cGo + cCos*(mj*8) + cSin*(mi*8);
  for (J=3D8; J>0; --J)
  {
    int32_t Fi, Gi;
    Fi =3D Fj; Fj -=3D cSin;=20
    Gi =3D Gj; Gj +=3D cCos;

    for (I=3D-8; I<0; ++I)
    {
      int32_t F, G, ri, rj;
      int C00,C01,C10,C11;

      F =3D ( Fi >> (alpha+rho+2) ) << rho; Fi +=3D cCos;
      G =3D ( Gi >> (alpha+rho+2) ) << rho; Gi +=3D cSin;

      ri =3D F & 15;
      rj =3D G & 15;

      F >>=3D 4;
      G >>=3D 4;

      if (F< -1) F=3D-1;
      else if (F>=3DW/2) F=3DW/2;
      if (G< -1) G=3D-1;
      else if (G>=3DH/2) G=3DH/2;

      C00 =3D pRef->u[ G*stride2 + F   ];
      C01 =3D pRef->u[ G*stride2 + F+1 ];
      C10 =3D pRef->u[ G*stride2 + F   + stride2];
      C11 =3D pRef->u[ G*stride2 + F+1 + stride2];

      C01 =3D (C00<<4) + ri*(C01-C00);
      C11 =3D (C10<<4) + ri*(C11-C10);
      C11 =3D (C01<<4) + rj*(C11-C01);
      C00 =3D ( C11 + rounder ) >> 8;

      dstU[I] =3D (uint8_t)C00;
     =20
      C00 =3D pRef->v[ G*stride2 + F   ];
      C01 =3D pRef->v[ G*stride2 + F+1 ];
      C10 =3D pRef->v[ G*stride2 + F   + stride2];
      C11 =3D pRef->v[ G*stride2 + F+1 + stride2];

      C01 =3D (C00<<4) + ri*(C01-C00);
      C11 =3D (C10<<4) + ri*(C11-C10);
      C11 =3D (C01<<4) + rj*(C11-C01);
      C00 =3D ( C11 + rounder ) >> 8;

      dstV[I] =3D (uint8_t)C00;
    }
    dstU +=3D stride2;
    dstV +=3D stride2;
  }

  avgMV.x -=3D 16*(120<<4);    // 120 =3D 15*16/2
  avgMV.y -=3D 16*(120<<4);

  avgMV.x =3D RSHIFT( avgMV.x, (4+7-quarterpel) );
  avgMV.y =3D RSHIFT( avgMV.y, (4+7-quarterpel) );
   =20
  return avgMV;
}

/*********************************************************************
 * main
 *********************************************************************/

int main(int argc, char *argv[])
{
  int what =3D 0;
  if (argc>1) what =3D atoi(argv[1]);

  if (what=3D=3D0 || what=3D=3D1)
    test_GMC(generate_GMCparameters, generate_GMCimageMB);
  if (what=3D=3D0 || what=3D=3D2)
    test_GMC(my_GMCparameters, my_GMCimageMB);
  return 0;
}

/*********************************************************************/

--=-APBwBi9oN659GV+PbUMx--