[XviD-devel] new CodeCoeff() and CodeVector() functions
skal
xvid-devel@xvid.org
29 Aug 2002 18:47:56 +0200
Hi all,
here are some revamped encoding functions for
inter/intra coeffs and motion vector.
On my P3/gcc it runs ~15% (especially on I-frame)
and ~30% faster, respectively. I admit it ends up
rather obfuscated C, but every line has its reason
to :)_
I don't include "alt_tab.h" here, since it's an
awful big (128Kb) uninteresting hex gibberish.
I'll send it directly if you think it's worth.
Or you can get it here:
http://skal.planet-d.net/coding/alt_tab.h
A quick note about CodeCoeff(). Current implementation
*is* the fastest, but this megatonic intra_table[524032]
is giving me the creeps:) I've used a smaller table, which
is slower, no matter what. To achieve higher speed, I've
re-organized the coeffs-scanning loop. Combined with previous
huge 8Megs-table, it would be even faster, but...
Note: the 'last' scanning (while(!qcoeff[zigzag[--last]]);)
could be combined with Cbp computation... (and even MMX'ed,
I think).
I've also used the fact that sometimes, Escape2 encoding
achieve fewer bits than Escape1 one, for some very special
combinations of run/level. Hence, don't be surprised if it
generates slightly smaller bitstreams (I've measured an
average saving of 0.01%, something like ~1.5Kb every
1000 frames @ 352x240, let's say).
bye,
Skal (sunburnt;)
/////////////////////////////////////////////////////////////////////////////////
// Table B-12 for vector>0, indexed by mv*2, with blank trailing sign
bit
// and leading 0 bit included. Entry #0 (mv==0) is excluded.
static const VLC MV_B12_Tab[32] = {
{ 2, 3}, { 2, 4}, { 2, 5}, { 6, 7}, {10, 8}, { 8, 8}, { 6, 8},
{22, 10}
, {20, 10}, {18, 10}, {34, 11}, {32, 11}, {30, 11}, {28, 11}, {26, 11},
{24, 11}
, {22, 11}, {20, 11}, {18, 11}, {16, 11}, {14, 11}, {12, 11}, {10, 11},
{ 8, 11}
, {14, 12}, {12, 12}, {10, 12}, { 8, 12}, { 6, 12}, { 4, 12}, { 6, 13},
{ 4, 13}
};
static __inline void
CodeVector(Bitstream * bs,
int32_t value,
int32_t f_code,
Statistics * pStat)
{
const int high = 1 << (f_code+4);
if (value < -high) value += 2*high;
if (value >= high) value -= 2*high;
pStat->iMvCount++;
if (value) {
const int sign = (value<0);
pStat->iMvSum += value * value;
if (sign) value ^= -1; // hope it's ok on ia64
else value -= 1;
if (!--f_code) {
// assert(value>=0 && value<=31);
BitstreamPutBits(bs, MV_B12_Tab[value].code | sign,
MV_B12_Tab[value].len);
}
else {
const int res = value & ((1<<f_code)-1);
const int code = value >> f_code;
// assert(code>=0 && code<=31);
BitstreamPutBits(bs, MV_B12_Tab[code].code | sign,
MV_B12_Tab[code].len);
BitstreamPutBits(bs, res, f_code);
}
}
else {
BitstreamPutBits(bs, 1, 1); // special case hardcoded
}
}
/////////////////////////////////////////////////////////////////////////////////
#include "alt_tab.h"
static __inline void
EncodeCoeff(Bitstream * bs,
int level,
const uint32_t *tab)
{
// assert(level!=0 && level>=-2048 && level<=2047);
if (!((level+64)&-128)) { // level is in [-64,63] -> use table
const uint32_t code = tab[level];
if (0<=(int32_t)code) // !bit31? -> it's a regular 21bits-max code
BitstreamPutBits( bs, code&0x00ffffff, code>>24);
else // otherwise, code is actually a 30bits-Esc3
BitstreamPutBits( bs, code&0x7fffffff, 7+2+1+6+1+12+1 );
}
else {
// Esc3 encoding (30bits total) for level not in [-64,63]
// tab[0] contains the esc3 code base: 0x01e02001 | (last<<20) |
(run<<14)
const uint32_t code = tab[0] | ((level&0xfff)<<1);
BitstreamPutBits( bs, code, 7+2+1+6+1+12+1 );
}
}
static __inline void
CodeCoeff(Bitstream * bs,
const int16_t qcoeff[64],
VLC * unused,
const uint16_t * zigzag,
uint32_t intra)
{
int level, last, j;
last = 64 - intra;
zigzag += intra;
while(!qcoeff[zigzag[--last]]);
// assert(last>=0); // otherwise, we shouldn't be here (->cbp)...
zigzag += last;
j = -last;
while(1) {
while(!(level=qcoeff[zigzag[j]])) j++;
last += j;
if (j<0) {
EncodeCoeff( bs, level, &B16_17_Tabs[intra][0][last][64] );
last = -++j;
}
else break;
}
EncodeCoeff( bs, level, &B16_17_Tabs[intra][1][last][64] );
}