Re[2]: [XviD-devel] motion estimation for B/P/I decision

Fri, 20 Sep 2002 00:34:05 +0200

This is a multi-part message in MIME format.

------=_NextPart_000_000F_01C2603D.6CB09AA0
Content-Type: text/plain;
	charset="iso-8859-1"
Content-Transfer-Encoding: 7bit

Marco Al wrote:
> I could make a C
> implementation of the image based one if you want.

Well I tried my hand at it, but it got rather uglier than I intended so I didnt
get a chance to start debugging it yet ... going to sleep now, Ill try to get it
to work tomorrow. Take a look if you want, it uses accumulated sum tables to
"cheaply" calculate the block sum x/y/x^2/y^2/xy for each pixel (I realise the
numbers will overflow, but it doesnt matter the final sum will be in the correct
range again). You can use the same method to calculate correlation for instance,
if you used it for block matching the only per pixel operations would be
calculating xy. Would probably only be slightly slower than SSD.

Warning, ugly code ahead ... guarantueed not to work.

Marco

------=_NextPart_000_000F_01C2603D.6CB09AA0
Content-Type: text/plain;
	name="image.c"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="image.c"

/************************************************************************=
*****
 *
 *  XVID MPEG-4 VIDEO CODEC
 *  - image module -
 *
 *  Copyright(C) 2002 Peter Ross <pross@xvid.org>
 *
 *  This program is an implementation of a part of one or more MPEG-4
 *  Video tools as specified in ISO/IEC 14496-2 standard.  Those =
intending
 *  to use this software module in hardware or software products are
 *  advised that its use may infringe existing patents or copyrights, =
and
 *  any such use would be at such party's own risk.  The original
 *  developer of this software module and his/her company, and =
subsequent
 *  editors and their companies, will have no liability for use of this
 *  software or modifications or derivatives thereof.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 =
USA
 *
 =
*************************************************************************=
***/

#include <stdlib.h>
#include <string.h>				// memcpy, memset
#include <math.h>

#include "../portab.h"
#include "../xvid.h"			// XVID_CSP_XXX's
#include "image.h"
#include "colorspace.h"
#include "interpolate8x8.h"
#include "../divx4.h"
#include "../utils/mem_align.h"

#define SAFETY	64
#define EDGE_SIZE2  (EDGE_SIZE/2)
#define BLOCK_SIZE 8
#define N BLOCK_SIZE * BLOCK_SIZE

int32_t
image_create(IMAGE * image,
			 uint32_t edged_width,
			 uint32_t edged_height)
{
	const uint32_t edged_width2 =3D edged_width / 2;
	const uint32_t edged_height2 =3D edged_height / 2;
	uint32_t i;

	image->y =3D
		xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE);
	if (image->y =3D=3D NULL) {
		return -1;
	}

	for (i =3D 0; i < edged_width * edged_height + SAFETY; i++) {
		image->y[i] =3D 0;
	}

	image->u =3D xvid_malloc(edged_width2 * edged_height2 + SAFETY, =
CACHE_LINE);
	if (image->u =3D=3D NULL) {
		xvid_free(image->y);
		return -1;
	}
	image->v =3D xvid_malloc(edged_width2 * edged_height2 + SAFETY, =
CACHE_LINE);
	if (image->v =3D=3D NULL) {
		xvid_free(image->u);
		xvid_free(image->y);
		return -1;
	}

	image->y +=3D EDGE_SIZE * edged_width + EDGE_SIZE;
	image->u +=3D EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
	image->v +=3D EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;

	return 0;
}

void
image_destroy(IMAGE * image,
			  uint32_t edged_width,
			  uint32_t edged_height)
{
	const uint32_t edged_width2 =3D edged_width / 2;

	if (image->y) {
		xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE));
	}
	if (image->u) {
		xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
	}
	if (image->v) {
		xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
	}
}

void
image_swap(IMAGE * image1,
		   IMAGE * image2)
{
	uint8_t *tmp;

	tmp =3D image1->y;
	image1->y =3D image2->y;
	image2->y =3D tmp;

	tmp =3D image1->u;
	image1->u =3D image2->u;
	image2->u =3D tmp;

	tmp =3D image1->v;
	image1->v =3D image2->v;
	image2->v =3D tmp;
}

void
image_copy(IMAGE * image1,
		   IMAGE * image2,
		   uint32_t edged_width,
		   uint32_t height)
{
	memcpy(image1->y, image2->y, edged_width * height);
	memcpy(image1->u, image2->u, edged_width * height / 4);
	memcpy(image1->v, image2->v, edged_width * height / 4);
}

void
image_setedges(IMAGE * image,
			   uint32_t edged_width,
			   uint32_t edged_height,
			   uint32_t width,
			   uint32_t height,
			   uint32_t interlacing)
{
	const uint32_t edged_width2 =3D edged_width / 2;
	const uint32_t width2 =3D width / 2;
	uint32_t i;
	uint8_t *dst;
	uint8_t *src;

	dst =3D image->y - (EDGE_SIZE + EDGE_SIZE * edged_width);
	src =3D image->y;

	for (i =3D 0; i < EDGE_SIZE; i++) {
/*		// if interlacing, edges contain top-most data from each field
		if (interlacing && (i & 1)) {
			memset(dst, *(src + edged_width), EDGE_SIZE);
			memcpy(dst + EDGE_SIZE, src + edged_width, width);
			memset(dst + edged_width - EDGE_SIZE,
				   *(src + edged_width + width - 1), EDGE_SIZE);
		} else {*/
			memset(dst, *src, EDGE_SIZE);
			memcpy(dst + EDGE_SIZE, src, width);
			memset(dst + edged_width - EDGE_SIZE, *(src + width - 1),
				   EDGE_SIZE);
		/*}*/
		dst +=3D edged_width;
	}

	for (i =3D 0; i < height; i++) {
		memset(dst, *src, EDGE_SIZE);
		memset(dst + edged_width - EDGE_SIZE, src[width - 1], EDGE_SIZE);
		dst +=3D edged_width;
		src +=3D edged_width;
	}

	src -=3D edged_width;
	for (i =3D 0; i < EDGE_SIZE; i++) {
/*		// if interlacing, edges contain bottom-most data from each field
		if (interlacing && !(i & 1)) {
			memset(dst, *(src - edged_width), EDGE_SIZE);
			memcpy(dst + EDGE_SIZE, src - edged_width, width);
			memset(dst + edged_width - EDGE_SIZE,
				   *(src - edged_width + width - 1), EDGE_SIZE);
		} else {*/
			memset(dst, *src, EDGE_SIZE);
			memcpy(dst + EDGE_SIZE, src, width);
			memset(dst + edged_width - EDGE_SIZE, *(src + width - 1),
				   EDGE_SIZE);
		/*}*/
		dst +=3D edged_width;
	}

//U
	dst =3D image->u - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
	src =3D image->u;

	for (i =3D 0; i < EDGE_SIZE2; i++) {
		memset(dst, *src, EDGE_SIZE2);
		memcpy(dst + EDGE_SIZE2, src, width2);
		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
			   EDGE_SIZE2);
		dst +=3D edged_width2;
	}

	for (i =3D 0; i < height / 2; i++) {
		memset(dst, *src, EDGE_SIZE2);
		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
		dst +=3D edged_width2;
		src +=3D edged_width2;
	}
	src -=3D edged_width2;
	for (i =3D 0; i < EDGE_SIZE2; i++) {
		memset(dst, *src, EDGE_SIZE2);
		memcpy(dst + EDGE_SIZE2, src, width2);
		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
			   EDGE_SIZE2);
		dst +=3D edged_width2;
	}

// V
	dst =3D image->v - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
	src =3D image->v;

	for (i =3D 0; i < EDGE_SIZE2; i++) {
		memset(dst, *src, EDGE_SIZE2);
		memcpy(dst + EDGE_SIZE2, src, width2);
		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
			   EDGE_SIZE2);
		dst +=3D edged_width2;
	}

	for (i =3D 0; i < height / 2; i++) {
		memset(dst, *src, EDGE_SIZE2);
		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
		dst +=3D edged_width2;
		src +=3D edged_width2;
	}
	src -=3D edged_width2;
	for (i =3D 0; i < EDGE_SIZE2; i++) {
		memset(dst, *src, EDGE_SIZE2);
		memcpy(dst + EDGE_SIZE2, src, width2);
		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
			   EDGE_SIZE2);
		dst +=3D edged_width2;
	}
}

// bframe encoding requires image-based u,v interpolation
void
image_interpolate(const IMAGE * refn,
				  IMAGE * refh,
				  IMAGE * refv,
				  IMAGE * refhv,
				  uint32_t edged_width,
				  uint32_t edged_height,
				  uint32_t rounding)
{
	const uint32_t offset =3D EDGE_SIZE * (edged_width + 1);
	const uint32_t stride_add =3D 7 * edged_width;

	uint8_t *n_ptr, *h_ptr, *v_ptr, *hv_ptr;
	uint32_t x, y;

	n_ptr =3D refn->y;
	h_ptr =3D refh->y;
	v_ptr =3D refv->y;
	hv_ptr =3D refhv->y;

	n_ptr -=3D offset;
	h_ptr -=3D offset;
	v_ptr -=3D offset;
	hv_ptr -=3D offset;

	for (y =3D 0; y < edged_height; y =3D y + 8) {
		for (x =3D 0; x < edged_width; x =3D x + 8) {
			interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width, rounding);
			interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width, rounding);
			interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width, rounding);

			n_ptr +=3D 8;
			h_ptr +=3D 8;
			v_ptr +=3D 8;
			hv_ptr +=3D 8;
		}
		h_ptr +=3D stride_add;
		v_ptr +=3D stride_add;
		hv_ptr +=3D stride_add;
		n_ptr +=3D stride_add;
	}

	/*
	   interpolate_halfpel_h(
	   refh->y - offset,
	   refn->y - offset,=20
	   edged_width, edged_height,
	   rounding);

	   interpolate_halfpel_v(
	   refv->y - offset,
	   refn->y - offset,=20
	   edged_width, edged_height,
	   rounding);

	   interpolate_halfpel_hv(
	   refhv->y - offset,
	   refn->y - offset,
	   edged_width, edged_height,
	   rounding);
	 */

	/* uv-image-based compensation
	   offset =3D EDGE_SIZE2 * (edged_width / 2 + 1);

	   interpolate_halfpel_h(
	   refh->u - offset,
	   refn->u - offset,=20
	   edged_width / 2, edged_height / 2,
	   rounding);

	   interpolate_halfpel_v(
	   refv->u - offset,
	   refn->u - offset,=20
	   edged_width / 2, edged_height / 2,
	   rounding);

	   interpolate_halfpel_hv(
	   refhv->u - offset,
	   refn->u - offset,=20
	   edged_width / 2, edged_height / 2,
	   rounding);

	   interpolate_halfpel_h(
	   refh->v - offset,
	   refn->v - offset,=20
	   edged_width / 2, edged_height / 2,
	   rounding);

	   interpolate_halfpel_v(
	   refv->v - offset,
	   refn->v - offset,=20
	   edged_width / 2, edged_height / 2,
	   rounding);

	   interpolate_halfpel_hv(
	   refhv->v - offset,
	   refn->v - offset,=20
	   edged_width / 2, edged_height / 2,
	   rounding);
	 */
}

int
image_input(IMAGE * image,
			uint32_t width,
			int height,
			uint32_t edged_width,
			uint8_t * src,
			int csp)
{

/*	if (csp & XVID_CSP_VFLIP)
	{
		height =3D -height;
	}
*/

	switch (csp & ~XVID_CSP_VFLIP) {
	case XVID_CSP_RGB555:
		rgb555_to_yv12(image->y, image->u, image->v, src, width, height,
					   edged_width);
		return 0;

	case XVID_CSP_RGB565:
		rgb565_to_yv12(image->y, image->u, image->v, src, width, height,
					   edged_width);
		return 0;

	case XVID_CSP_RGB24:
		rgb24_to_yv12(image->y, image->u, image->v, src, width, height,
					  edged_width);
		return 0;

	case XVID_CSP_RGB32:
		rgb32_to_yv12(image->y, image->u, image->v, src, width, height,
					  edged_width);
		return 0;

	case XVID_CSP_I420:
		yuv_to_yv12(image->y, image->u, image->v, src, width, height,
					edged_width);
		return 0;

	case XVID_CSP_YV12:		/* u/v swapped */
		yuv_to_yv12(image->y, image->v, image->u, src, width, height,
					edged_width);
		return 0;

	case XVID_CSP_YUY2:
		yuyv_to_yv12(image->y, image->u, image->v, src, width, height,
					 edged_width);
		return 0;

	case XVID_CSP_YVYU:		/* u/v swapped */
		yuyv_to_yv12(image->y, image->v, image->u, src, width, height,
					 edged_width);
		return 0;

	case XVID_CSP_UYVY:
		uyvy_to_yv12(image->y, image->u, image->v, src, width, height,
					 edged_width);
		return 0;

	case XVID_CSP_USER:
		user_to_yuv_c(image->y, image->u, image->v, edged_width,
					  (DEC_PICTURE *) src, width, height);
		return 0;

	case XVID_CSP_NULL:
		break;

	}

	return -1;
}

int
image_output(IMAGE * image,
			 uint32_t width,
			 int height,
			 uint32_t edged_width,
			 uint8_t * dst,
			 uint32_t dst_stride,
			 int csp)
{
	if (csp & XVID_CSP_VFLIP) {
		height =3D -height;
	}

	switch (csp & ~XVID_CSP_VFLIP) {
	case XVID_CSP_RGB555:
		yv12_to_rgb555(dst, dst_stride, image->y, image->u, image->v,
					   edged_width, edged_width / 2, width, height);
		return 0;

	case XVID_CSP_RGB565:
		yv12_to_rgb565(dst, dst_stride, image->y, image->u, image->v,
					   edged_width, edged_width / 2, width, height);
		return 0;

	case XVID_CSP_RGB24:
		yv12_to_rgb24(dst, dst_stride, image->y, image->u, image->v,
					  edged_width, edged_width / 2, width, height);
		return 0;

	case XVID_CSP_RGB32:
		yv12_to_rgb32(dst, dst_stride, image->y, image->u, image->v,
					  edged_width, edged_width / 2, width, height);
		return 0;

	case XVID_CSP_I420:
		yv12_to_yuv(dst, dst_stride, image->y, image->u, image->v, =
edged_width,
					edged_width / 2, width, height);
		return 0;

	case XVID_CSP_YV12:		// u,v swapped
		yv12_to_yuv(dst, dst_stride, image->y, image->v, image->u, =
edged_width,
					edged_width / 2, width, height);
		return 0;

	case XVID_CSP_YUY2:
		yv12_to_yuyv(dst, dst_stride, image->y, image->u, image->v,
					 edged_width, edged_width / 2, width, height);
		return 0;

	case XVID_CSP_YVYU:		// u,v swapped
		yv12_to_yuyv(dst, dst_stride, image->y, image->v, image->u,
					 edged_width, edged_width / 2, width, height);
		return 0;

	case XVID_CSP_UYVY:
		yv12_to_uyvy(dst, dst_stride, image->y, image->u, image->v,
					 edged_width, edged_width / 2, width, height);
		return 0;

	case XVID_CSP_USER:
		((DEC_PICTURE *) dst)->y =3D image->y;
		((DEC_PICTURE *) dst)->u =3D image->u;
		((DEC_PICTURE *) dst)->v =3D image->v;
		((DEC_PICTURE *) dst)->stride_y =3D edged_width;
		((DEC_PICTURE *) dst)->stride_uv =3D edged_width / 2;
		return 0;

	case XVID_CSP_NULL:
	case XVID_CSP_EXTERN:
		return 0;

	}

	return -1;
}

float
image_psnr(IMAGE * orig_image,
		   IMAGE * recon_image,
		   uint16_t stride,
		   uint16_t width,
		   uint16_t height)
{
	int32_t diff, x, y, quad =3D 0;
	uint8_t *orig =3D orig_image->y;
	uint8_t *recon =3D recon_image->y;
	float psnr_y;

	for (y =3D 0; y < height; y++) {
		for (x =3D 0; x < width; x++) {
			diff =3D *(orig + x) - *(recon + x);
			quad +=3D diff * diff;
		}
		orig +=3D stride;
		recon +=3D stride;
	}

	psnr_y =3D (float) quad / (float) (width * height);

	if (psnr_y) {
		psnr_y =3D (float) (255 * 255) / psnr_y;
		psnr_y =3D 10 * (float) log10(psnr_y);
	} else
		psnr_y =3D (float) 99.99;

	return psnr_y;
}

float
image_structural_distortion(IMAGE * orig_image,
		   IMAGE * recon_image,
		   uint16_t stride,
		   uint16_t width,
		   uint16_t height)
{
	int32_t x, y;
	uint8_t *orig =3D orig_image->y;
	uint8_t *recon =3D recon_image->y;
	uint32_t orig_sum_block, recon_sum_block, orig_sq_sum_block, =
recon_sq_sum_block, origrecon_sum_block,
		origrecon_sum_block_mul, origrecon_sum_block_sq_sum;
	float numerator, denominator1, denominator, q, structural_distortion =
=3D 0;

	// not worth aligning well given the way we are accessing them
	uint16_t *orig_sum_ptr, *orig_sum =3D xvid_malloc ((height + 1) * =
(width + 1) * 2, 2);
	uint16_t *recon_sum_ptr, *recon_sum =3D xvid_malloc ((height + 1) * =
(width + 1) * 2, 2);
	uint32_t *orig_sq_sum_ptr, *orig_sq_sum =3D xvid_malloc ((height + 1) * =
(width + 1) * 2, 4);
	uint32_t *recon_sq_sum_ptr, *recon_sq_sum =3D xvid_malloc ((height + 1) =
* (width + 1) * 2, 4);
	uint32_t *origrecon_sum_ptr, *origrecon_sum =3D xvid_malloc ((height + =
1) * (width + 1) * 2, 4);

	uint32_t orig_sum_temp =3D 0;
	uint32_t recon_sum_temp =3D 0;
	uint32_t orig_sq_sum_temp =3D 0;
	uint32_t recon_sq_sum_temp =3D 0;
	uint32_t origrecon_sum_temp =3D 0;

	orig_sum_ptr =3D orig_sum;
	recon_sum_ptr =3D recon_sum;
	orig_sq_sum_ptr =3D orig_sq_sum;
	recon_sq_sum_ptr =3D recon_sq_sum;
	origrecon_sum_ptr =3D origrecon_sum;

	for (y =3D 0; y < height + 1; y++) {
		*(orig_sum + y * (width + 1)) =3D 0;
		*(recon_sum + y * (width + 1)) =3D 0;
		*(orig_sq_sum + y * (width + 1)) =3D 0;
		*(recon_sq_sum + y * (width + 1)) =3D 0;
		*(origrecon_sum + y * (width + 1)) =3D 0;
	}

	for (x =3D 0; x < width + 1; x++) {
		*(orig_sum + x) =3D 0;
		*(recon_sum + x) =3D 0;
		*(orig_sq_sum + x) =3D 0;
		*(recon_sq_sum + x) =3D 0;
		*(origrecon_sum + x) =3D 0;
	}

	orig_sum +=3D width + 2;
	recon_sum +=3D width + 2;
	orig_sq_sum +=3D width + 2;
	recon_sq_sum +=3D width + 2;
	origrecon_sum +=3D width + 2;

	for (y =3D 0; y < height; y++) {
		orig_sum_temp =3D 0;
		recon_sum_temp =3D 0;
		orig_sq_sum_temp =3D 0;
		recon_sq_sum_temp =3D 0;
		origrecon_sum_temp =3D 0;

		for (x =3D 0; x < width; x++) {
			orig_sum_temp +=3D  *(orig + x);
			*(orig_sum + x) =3D *(orig_sum + x - width - 1) + orig_sum_temp;
			recon_sum_temp +=3D *(recon + x);
			*(recon_sum + x) =3D *(recon_sum + x - width - 1) + recon_sum_temp;
			orig_sq_sum_temp +=3D *(orig + x) * *(orig + x);
			*(orig_sq_sum + x) =3D *(orig_sq_sum + x - width - 1) + =
orig_sq_sum_temp;
			recon_sq_sum_temp +=3D *(recon + x) * *(recon + x);
			*(recon_sq_sum + x) =3D *(recon_sq_sum + x - width - 1) + =
recon_sq_sum_temp;
			origrecon_sum_temp +=3D *(orig + x) * *(recon + x);
			*(origrecon_sum + x) =3D *(origrecon_sum + x - width - 1) + =
origrecon_sum_temp;
		}

		orig +=3D stride;
		recon +=3D stride;
		orig_sum +=3D width + 1;
		recon_sum +=3D width + 1;
		orig_sq_sum +=3D width + 1;
		recon_sq_sum +=3D width + 1;
		origrecon_sum +=3D width + 1;
	}

	orig =3D orig_image->y;
	recon =3D recon_image->y;
	orig_sum =3D orig_sum_ptr;
	recon_sum =3D recon_sum_ptr;
    orig_sq_sum =3D orig_sq_sum_ptr;
	recon_sq_sum =3D recon_sq_sum_ptr;
    origrecon_sum =3D origrecon_sum_ptr;

	for (y =3D 0; y < height - BLOCK_SIZE + 1; y++) {
		for (x =3D 0; x < width - BLOCK_SIZE + 1; x++) {
			orig_sum_block =3D *(orig_sum + x + BLOCK_SIZE + (y + BLOCK_SIZE) * =
(width + 1)) -
                *(orig_sum + x + BLOCK_SIZE + y * (width + 1)) -
				*(orig_sum + x + (y + BLOCK_SIZE) * (width + 1)) +
				*(orig_sum + x + y * (width + 1));
			recon_sum_block =3D *(recon_sum + x + BLOCK_SIZE + (y + BLOCK_SIZE) * =
(width + 1)) -
				*(recon_sum + x + BLOCK_SIZE + y * (width + 1)) -
				*(recon_sum + x + (y + BLOCK_SIZE) * (width + 1)) +
				*(recon_sum + x + y * (width + 1));
			orig_sq_sum_block =3D *(orig_sq_sum + x + BLOCK_SIZE + (y + =
BLOCK_SIZE) * (width + 1)) -
				*(orig_sq_sum + x + BLOCK_SIZE + y * (width + 1)) -
				*(orig_sq_sum + x + (y + BLOCK_SIZE) * (width + 1)) +
				*(orig_sq_sum + x + y * (width + 1));
			recon_sq_sum_block =3D *(recon_sq_sum + x + BLOCK_SIZE + (y + =
BLOCK_SIZE) * (width + 1)) -
				*(recon_sq_sum + x + BLOCK_SIZE + y * (width + 1)) -
				*(recon_sq_sum + x + (y + BLOCK_SIZE) * (width + 1)) +
				*(recon_sq_sum + x + y * (width + 1));
			origrecon_sum_block =3D *(origrecon_sum + x + BLOCK_SIZE + (y + =
BLOCK_SIZE) * (width + 1)) -
				*(origrecon_sum + x + BLOCK_SIZE + y * (width + 1)) -
				*(origrecon_sum + x + (y + BLOCK_SIZE) * (width + 1)) +
				*(origrecon_sum + x + y * (width + 1));
			origrecon_sum_block_mul =3D orig_sum_block * recon_sum_block;
			/* the matlab equivalent for the next name was silly, yes even =
sillier than these ones, so instead
			of sq sum mul I used sum sq sum */
			origrecon_sum_block_sq_sum =3D orig_sum_block * orig_sum_block + =
recon_sum_block * recon_sum_block;

			numerator =3D 4 * (N * (float)(origrecon_sum_block - =
origrecon_sum_block_mul)) * (float)origrecon_sum_block_mul;
			denominator1 =3D N * (float)((orig_sq_sum_block + recon_sq_sum_block) =
- origrecon_sum_block_sq_sum);
			denominator =3D denominator1 * origrecon_sum_block_sq_sum;

			q =3D 1;
			if ((denominator1 =3D=3D 0) && (origrecon_sum_block_sq_sum !=3D 0))
				q =3D 2 * (float)origrecon_sum_block_mul / =
origrecon_sum_block_sq_sum;
			if (denominator !=3D 0)
				q =3D numerator / denominator;
		=09
			structural_distortion +=3D q;
		}
	}

	structural_distortion /=3D (width - BLOCK_SIZE + 1) * (height - =
BLOCK_SIZE + 1);
	return structural_distortion;
}

/*

#include <stdio.h>
#include <string.h>

int image_dump_pgm(uint8_t * bmp, uint32_t width, uint32_t height, char =
* filename)
{
	FILE * f;
	char hdr[1024];
=09
	f =3D fopen(filename, "wb");
	if ( f =3D=3D NULL)
	{
		return -1;
	}
	sprintf(hdr, "P5\n#xvid\n%i %i\n255\n", width, height);
	fwrite(hdr, strlen(hdr), 1, f);
	fwrite(bmp, width, height, f);
	fclose(f);

	return 0;
}

// dump image+edges to yuv pgm files=20

int image_dump(IMAGE * image, uint32_t edged_width, uint32_t =
edged_height, char * path, int number)
{
	char filename[1024];

	sprintf(filename, "%s_%i_%c.pgm", path, number, 'y');
	image_dump_pgm(
		image->y - (EDGE_SIZE * edged_width + EDGE_SIZE),
		edged_width, edged_height, filename);

	sprintf(filename, "%s_%i_%c.pgm", path, number, 'u');
	image_dump_pgm(
		image->u - (EDGE_SIZE2 * edged_width / 2 + EDGE_SIZE2),
		edged_width / 2, edged_height / 2, filename);

	sprintf(filename, "%s_%i_%c.pgm", path, number, 'v');
	image_dump_pgm(
		image->v - (EDGE_SIZE2 * edged_width / 2 + EDGE_SIZE2),
		edged_width / 2, edged_height / 2, filename);

	return 0;
}
*/

/* dump image to yuvpgm file */

#include <stdio.h>

int
image_dump_yuvpgm(const IMAGE * image,
				  const uint32_t edged_width,
				  const uint32_t width,
				  const uint32_t height,
				  char *filename)
{
	FILE *f;
	char hdr[1024];
	uint32_t i;
	uint8_t *bmp1;
	uint8_t *bmp2;

	f =3D fopen(filename, "wb");
	if (f =3D=3D NULL) {
		return -1;
	}
	sprintf(hdr, "P5\n#xvid\n%i %i\n255\n", width, (3 * height) / 2);
	fwrite(hdr, strlen(hdr), 1, f);

	bmp1 =3D image->y;
	for (i =3D 0; i < height; i++) {
		fwrite(bmp1, width, 1, f);
		bmp1 +=3D edged_width;
	}

	bmp1 =3D image->u;
	bmp2 =3D image->v;
	for (i =3D 0; i < height / 2; i++) {
		fwrite(bmp1, width / 2, 1, f);
		fwrite(bmp2, width / 2, 1, f);
		bmp1 +=3D edged_width / 2;
		bmp2 +=3D edged_width / 2;
	}

	fclose(f);
	return 0;
}

#define ABS(X)    (((X)>0)?(X):-(X))
float
image_mad(const IMAGE * img1,
		  const IMAGE * img2,
		  uint32_t stride,
		  uint32_t width,
		  uint32_t height)
{
	const uint32_t stride2 =3D stride / 2;
	const uint32_t width2 =3D width / 2;
	const uint32_t height2 =3D height / 2;

	uint32_t x, y;
	uint32_t sum =3D 0;

	for (y =3D 0; y < height; y++)
		for (x =3D 0; x < width; x++)
			sum +=3D ABS(img1->y[x + y * stride] - img2->y[x + y * stride]);

	for (y =3D 0; y < height2; y++)
		for (x =3D 0; x < width2; x++)
			sum +=3D ABS(img1->u[x + y * stride2] - img2->u[x + y * stride2]);

	for (y =3D 0; y < height2; y++)
		for (x =3D 0; x < width2; x++)
			sum +=3D ABS(img1->v[x + y * stride2] - img2->v[x + y * stride2]);

	return (float) sum / (width * height * 3 / 2);
}

void
output_slice(IMAGE * cur, int std, int width, XVID_DEC_PICTURE* out_frm, =
int mbx, int mby,int mbl) {
  uint8_t *dY,*dU,*dV,*sY,*sU,*sV;
  int std2 =3D std >> 1;
  int w =3D mbl << 4, w2,i;

  if(w > width)
    w =3D width;
  w2 =3D w >> 1;

  dY =3D (uint8_t*)out_frm->y + (mby << 4) * out_frm->stride_y + (mbx << =
4);
  dU =3D (uint8_t*)out_frm->u + (mby << 3) * out_frm->stride_u + (mbx << =
3);
  dV =3D (uint8_t*)out_frm->v + (mby << 3) * out_frm->stride_v + (mbx << =
3);
  sY =3D cur->y + (mby << 4) * std + (mbx << 4);
  sU =3D cur->u + (mby << 3) * std2 + (mbx << 3);
  sV =3D cur->v + (mby << 3) * std2 + (mbx << 3);

  for(i =3D 0 ; i < 16 ; i++) {
    memcpy(dY,sY,w);
    dY +=3D out_frm->stride_y;
    sY +=3D std;
  }
  for(i =3D 0 ; i < 8 ; i++) {
    memcpy(dU,sU,w2);
    dU +=3D out_frm->stride_u;
    sU +=3D std2;
  }
  for(i =3D 0 ; i < 8 ; i++) {
    memcpy(dV,sV,w2);
    dV +=3D out_frm->stride_v;
    sV +=3D std2;
  }
}

------=_NextPart_000_000F_01C2603D.6CB09AA0--