Commit f44f2f9c authored by Aaron Holtzman's avatar Aaron Holtzman

Remove the goofy mb_buffer stuff that was killing us. I don't know

what I was thinking.

Also add new yuv2rgb code from Michel LESPINASSE which is much faster
in the 32 bit case.

cheers,
aaron
parent 786dee7a
......@@ -2,6 +2,8 @@ mpeg2dec-0.1.7
-added Xv support
-major cruft removal
-cleanup on motion_comp
-remove mb_buffer goofiness
-new yuv2rgb code
mpeg2dec-0.1.6 Wed Mar 22 09:45:44 PST 2000
-enhanced g200
......
......@@ -2,11 +2,12 @@
AUTOMAKE_OPTIONS = 1.3 foreign no-dependencies
SUBDIRS = libmpeg2 tools drivers
CFLAGS = -Wall -O3 -g -fomit-frame-pointer
SUBDIRS = libmpeg2 tools drivers .
EXTRA_DIST = Changelog autogen.sh
CFLAGS = -Wall -O3 -g -I/usr/src/linux
bin_PROGRAMS = mpeg2dec
......@@ -20,9 +21,12 @@ EXTRA_mpeg2dec_SOURCES = display_gatos.c display_mga_vid.c display_x11.c\
noinst_HEADERS = config.h display.h yuv2rgb.h yuv2rgb_mlib.h
prof:
$(MAKE) mpeg2dec CFLAGS="-pg $(CFLAGS)"
lib_LTLIBRARIES = libmpeg2.la
libmpeg2_la_SOURCES = mpeg2dec.c
prof:
$(MAKE) mpeg2dec CFLAGS="-pg `echo $(CFLAGS) | sed s/-fomit-frame-pointer//`"
x11:
$(MAKE) -e mpeg2dec mpeg2dec_LDADD="display_x11.o idct_block_mmx.S motion_comp_mmx.c idct_mmx.c"
......@@ -10,7 +10,7 @@ Contributions by (in no particular order):
H}kan Hjort <d95hjort@dtek.chalmers.se> - Solaris fixes
Didier Gautheron <dgautheron@magic.fr> - bug fixes
Gerd Knorr <kraxel@goldbach.in-berlin.de> - Xv support
Michel LESPINASSE <walken@windriver.com> - motion comp cleanup et al.
Michel LESPINASSE <walken@windriver.com> - motion comp cleanup, new yuv2rgb
David I. Lehn <dlehn@vt.edu> - motion_comp mmx code
Fredrik Vraalsen <vraalsen@cs.uiuc.edu> - general hackage and good stuff
(let me know if I've forgotten anyone)
......
......@@ -37,6 +37,7 @@ AC_ARG_WITH(kernel-includes,
])
AC_SUBST(KERNEL_INCLUDES)
CFLAGS = -g -O3 -Wall -fomit-frame-pointer
dnl
dnl Display driver probing...
......@@ -129,7 +130,8 @@ dnl Set the appropriate architecture define
case "$host" in
i?86-*)
AC_DEFINE(__i386__)
LIB_CONFIG_OBJS="$LIB_CONFIG_OBJS idct_mmx.lo idct_block_mmx.lo motion_comp_mmx.lo";;
LIB_CONFIG_OBJS="$LIB_CONFIG_OBJS idct_mmx.lo idct_block_mmx.lo motion_comp_mmx.lo"
CFLAGS="$CFLAGS -march=pentiumpro";;
alpha*-*) AC_DEFINE(__alpha__);;
sparc-*) AC_DEFINE(__sparc__);;
ppc-*) AC_DEFINE(__ppc__);;
......
CFLAGS = -Wall -O3 -g @CFLAGS@ -I/usr/src/linux
CFLAGS = -Wall -O3 -g -fomit-frame-pointer
#lib_LTLIBRARIES = libmpeg2.a
lib_LIBRARIES = libmpeg2.a
noinst_HEADERS = config.h getvlc.h stats.h parse.h mb_buffer.h\
motion_comp.h mpeg2.h mpeg2_internal.h idct.h\
noinst_HEADERS = config.h getvlc.h stats.h parse.h motion_comp.h \
mpeg2.h mpeg2_internal.h idct.h\
mpeg2dec.h debug.h bitstream.h idct_mmx.h idct_mlib.h motion_comp_mmx.h\
motion_comp_mlib.h mmx.h
......@@ -13,7 +13,7 @@ EXTRA_libmpeg2_a_SOURCES = idct_mmx.c idct_block_mmx.S idct_mlib.c\
motion_comp_mmx.c motion_comp_mlib.c
libmpeg2_a_LIBADD = @LIB_CONFIG_OBJS@
libmpeg2_a_SOURCES = getvlc.c mb_buffer.c parse.c stats.c idct.c motion_comp.c\
libmpeg2_a_SOURCES = getvlc.c parse.c stats.c idct.c motion_comp.c\
motion_comp_c.c decode.c bitstream.c debug.c
libmpeg2_a_DEPENDENCIES = @LIB_CONFIG_OBJS@
#libmpeg2_a_LDFLAGS = -version-info 1:6:1
......@@ -33,7 +33,6 @@
#include "mpeg2.h"
#include "mpeg2_internal.h"
#include "mb_buffer.h"
#include "motion_comp.h"
#include "bitstream.h"
#include "idct.h"
......@@ -47,7 +46,11 @@
//this is where we keep the state of the decoder
static picture_t picture;
static slice_t slice;
static macroblock_t *mb;
static macroblock_t mb;
//storage for dct coded blocks plus one row and column of overshoot
static sint_16 y_blocks[4 * 64 + 16];
static sint_16 cr_blocks[64 + 16];
static sint_16 cb_blocks[64 + 16];
//global config struct
mpeg2_config_t config;
......@@ -113,19 +116,6 @@ decode_find_header(uint_32 type,picture_t *picture)
}
}
static void decode_flush_buffer(void)
{
mb_buffer_t mb_buffer;
mb_buffer_flush(&mb_buffer);
idct(&mb_buffer);
motion_comp(&picture,&mb_buffer);
//reset mb pointer for next slice
mb = mb_buffer.macroblocks;
}
void
mpeg2_init(void)
{
......@@ -148,12 +138,14 @@ mpeg2_init(void)
//FIXME setup config properly
config.flags = MPEG2_MMX_ENABLE;
mb.y_blocks = y_blocks;
mb.cr_blocks = cr_blocks;
mb.cb_blocks = cb_blocks;
//intialize the decoder state (ie the parser knows best)
parse_state_init(&picture);
idct_init();
motion_comp_init();
mb = mb_buffer_init(CHROMA_420);
}
......@@ -242,62 +234,59 @@ mpeg2_decode_frame (void)
slice.dc_dct_pred[0]=slice.dc_dct_pred[1]=slice.dc_dct_pred[2]=
1<<(picture.intra_dc_precision + 7);
mb.coded_block_pattern = 0;
mb.skipped = 1;
//handling of skipped mb's differs between P_TYPE and B_TYPE
//pictures
if(picture.picture_coding_type == P_TYPE)
{
parse_reset_pmv(&slice);
memset(mb.f_motion_vectors[0],0,8);
mb.macroblock_type = MACROBLOCK_MOTION_FORWARD;
for(i=0; i< mba_inc - 1; i++)
{
memset(mb->f_motion_vectors[0],0,8);
mb->macroblock_type = MACROBLOCK_MOTION_FORWARD;
mb->coded_block_pattern = 0;
mb->skipped = 1;
mb->mba = ++mba;
mb = mb_buffer_increment();
mb.mba = ++mba;
motion_comp(&picture,&mb);
}
}
else
{
memcpy(mb.f_motion_vectors[0],slice.f_pmv,8);
memcpy(mb.b_motion_vectors[0],slice.b_pmv,8);
mb.macroblock_type = prev_macroblock_type;
for(i=0; i< mba_inc - 1; i++)
{
memcpy(mb->f_motion_vectors[0],slice.f_pmv,8);
memcpy(mb->b_motion_vectors[0],slice.b_pmv,8);
mb->macroblock_type = prev_macroblock_type;
mb->coded_block_pattern = 0;
mb->skipped = 1;
mb->mba = ++mba;
mb = mb_buffer_increment();
mb.mba = ++mba;
motion_comp(&picture,&mb);
}
}
mb.skipped = 0;
}
mb->skipped = 0;
mb->mba = ++mba;
mb.mba = ++mba;
parse_macroblock(&picture,&slice,&mb);
parse_macroblock(&picture,&slice,mb);
//we store the last macroblock mv flags, as skipped b-frame blocks
//inherit them
prev_macroblock_type = mb->macroblock_type & (MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD);
mb = mb_buffer_increment();
prev_macroblock_type = mb.macroblock_type & (MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD);
idct(&mb);
motion_comp(&picture,&mb);
if(!mb)
decode_flush_buffer();
}
while(bitstream_show(23));
}
while(mba < last_mba);
decode_flush_buffer();
//FIXME blah
#ifdef __i386__
emms();
#endif
//decide which frame to send to the display
if(picture.picture_coding_type == B_TYPE)
{
......
......@@ -45,12 +45,15 @@
#include "mpeg2.h"
#include "mpeg2_internal.h"
#include "mb_buffer.h"
#include "idct.h"
#include "idct_mmx.h"
#include "idct_mlib.h"
// Fixed Point
// to convert from 16.0 to 12.4 fixed point
#define FP 4
#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */
#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */
#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
......@@ -60,13 +63,14 @@
// idct main entry point
void (*idct)(mb_buffer_t *mb_buffer);
static void (*idct_block)(sint_16 *block);
static void (*idct_end)();
// private prototypes
static void idct_row(sint_16 *blk);
static void idct_col_s16(sint_16 *blk);
static void idct_c(mb_buffer_t *mb_buffer);
static void idct_row_c(sint_16 *blk);
static void idct_col_s16_c(sint_16 *blk);
static void idct_block_c(sint_16 *block);
static void idct_end_c(void);
// Clamp to [-256,255]
static sint_16 clip_tbl[1024]; /* clipping table */
......@@ -75,24 +79,33 @@ static sint_16 *clip;
void
idct_init(void)
{
sint_32 i;
sint_32 i;
clip = clip_tbl + 512;
clip = clip_tbl + 512;
for (i= -512; i< 512; i++)
clip[i] = (i < -256) ? -256 : ((i > 255) ? 255 : i);
for (i= -512; i< 512; i++)
clip[i] = (i < -256) ? -256 : ((i > 255) ? 255 : i);
#ifdef __i386__
if(config.flags & MPEG2_MMX_ENABLE)
idct = idct_mmx;
{
idct_block = idct_block_mmx;
idct_end = idct_end_mmx;
}
else
#endif
#ifdef HAVE_MLIB
if(1 || config.flags & MPEG2_MLIB_ENABLE) // Fix me
idct = idct_mlib;
if(config.flags & MPEG2_MLIB_ENABLE)
{
idct_block = idct_block_mlib;
idct_end = idct_end_mlib;
}
else
#endif
idct = idct_c;
{
idct_block = idct_block_c;
idct_end = idct_end_c;
}
}
/* row (horizontal) IDCT
......@@ -105,17 +118,18 @@ idct_init(void)
* c[1..7] = 128*sqrt(2)
*/
static void idct_row(sint_16 *blk)
static void inline
idct_row_c(sint_16 *blk)
{
sint_32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
x1 = blk[4]<<11;
x2 = blk[6];
x3 = blk[2];
x2 = blk[6];
x3 = blk[2];
x4 = blk[1];
x5 = blk[7];
x6 = blk[5];
x7 = blk[3];
x5 = blk[7];
x6 = blk[5];
x7 = blk[3];
/* shortcut */
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7 ))
......@@ -175,18 +189,19 @@ static void idct_row(sint_16 *blk)
* c[1..7] = (1/1024)*sqrt(2)
*/
static void idct_col_s16(sint_16 *blk)
static void inline
idct_col_s16_c(sint_16 *blk)
{
int x0, x1, x2, x3, x4, x5, x6, x7, x8;
/* shortcut */
x1 = (blk[8*4]<<8);
x2 = blk[8*6];
x3 = blk[8*2];
x1 = blk[8*4]<<8;
x2 = blk[8*6];
x3 = blk[8*2];
x4 = blk[8*1];
x5 = blk[8*7];
x6 = blk[8*5];
x7 = blk[8*3];
x5 = blk[8*7];
x6 = blk[8*5];
x7 = blk[8*3];
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7 ))
{
......@@ -235,57 +250,42 @@ static void idct_col_s16(sint_16 *blk)
blk[8*7] = clip[(x7-x1)>>14];
}
void
idct_c(mb_buffer_t *mb_buffer)
idct_block_c(sint_16 *block)
{
uint_32 i,j,k;
sint_16 *blk;
macroblock_t *mb = mb_buffer->macroblocks;
uint_32 num_blocks = mb_buffer->num_blocks;
for(k=0;k<num_blocks;k++)
{
if(mb[k].skipped)
continue;
for(i=0;i<4;i++)
{
blk = mb[k].y_blocks + 64*i;
if(mb[k].coded_block_pattern & (0x20 >> i))
{
for (j=0; j<8; j++)
idct_row(blk + 8*j);
for (j=0; j<8; j++)
idct_col_s16(blk + j);
}
}
uint_32 i;
if(mb[k].coded_block_pattern & 0x2)
{
blk = mb[k].cr_blocks;
for (i=0; i<8; i++)
idct_row_c(block + 8*i);
for (j=0; j<8; j++)
idct_row(blk + 8*j);
for (j=0; j<8; j++)
idct_col_s16(blk + j);
}
if(mb[k].coded_block_pattern & 0x1)
{
blk = mb[k].cb_blocks;
for (j=0; j<8; j++)
idct_row(blk + 8*j);
for (j=0; j<8; j++)
idct_col_s16(blk + j);
}
}
for (i=0; i<8; i++)
idct_col_s16_c(block + i);
}
void
idct_end_c(void)
{
}
void
idct(macroblock_t *mb)
{
//XXX only 4:2:0 supported here
if(mb->coded_block_pattern & 0x20)
idct_block(mb->y_blocks + 64*0);
if(mb->coded_block_pattern & 0x10)
idct_block(mb->y_blocks + 64*1);
if(mb->coded_block_pattern & 0x08)
idct_block(mb->y_blocks + 64*2);
if(mb->coded_block_pattern & 0x04)
idct_block(mb->y_blocks + 64*3);
if(mb->coded_block_pattern & 0x2)
idct_block(mb->cr_blocks);
if(mb->coded_block_pattern & 0x1)
idct_block(mb->cb_blocks);
idct_end();
}
......@@ -23,6 +23,5 @@
*
*/
extern void (*idct)(mb_buffer_t *mb_buffer);
void idct(macroblock_t *mb);
void idct_init(void);
......@@ -25,7 +25,6 @@
#include "mpeg2.h"
#include "mpeg2_internal.h"
#include "mb_buffer.h"
#include "idct.h"
#include <mlib_types.h>
#include <mlib_status.h>
......@@ -33,42 +32,15 @@
#include <mlib_video.h>
void
idct_mlib(mb_buffer_t *mb_buffer)
idct_block_mlib(sint_16 *block)
{
uint_32 k;
macroblock_t *mb = mb_buffer->macroblocks;
uint_32 num_blocks = mb_buffer->num_blocks;
for(k=0; k<num_blocks; k++)
{
if(mb[k].skipped)
continue;
// Should we use mlib_VideoIDCT_IEEE_S16_S16 here ??
// it's ~30% slower.
//XXX only 4:2:0 supported here
if(mb[k].coded_block_pattern & 0x20)
mlib_VideoIDCT8x8_S16_S16(mb[k].y_blocks + 64*0,
mb[k].y_blocks + 64*0);
if(mb[k].coded_block_pattern & 0x10)
mlib_VideoIDCT8x8_S16_S16(mb[k].y_blocks + 64*1,
mb[k].y_blocks + 64*1);
if(mb[k].coded_block_pattern & 0x08)
mlib_VideoIDCT8x8_S16_S16(mb[k].y_blocks + 64*2,
mb[k].y_blocks + 64*2);
if(mb[k].coded_block_pattern & 0x04)
mlib_VideoIDCT8x8_S16_S16(mb[k].y_blocks + 64*3,
mb[k].y_blocks + 64*3);
if(mb[k].coded_block_pattern & 0x2)
mlib_VideoIDCT8x8_S16_S16(mb[k].cr_blocks,
mb[k].cr_blocks);
if(mb[k].coded_block_pattern & 0x1)
mlib_VideoIDCT8x8_S16_S16(mb[k].cb_blocks,
mb[k].cb_blocks);
}
// Should we use mlib_VideoIDCT_IEEE_S16_S16 here ??
// it's ~30% slower.
mlib_VideoIDCT8x8_S16_S16(block, block);
}
void
idct_end_mlib()
{
}
......@@ -21,4 +21,5 @@
*
*/
void idct_mlib(mb_buffer_t *mb_buffer);
void idct_block_mlib(macroblock_t *mb);
void idct_end_mlib();
......@@ -26,43 +26,18 @@
*/
#include <stdio.h>
#include <mmx.h>
#include "mpeg2.h"
#include "mpeg2_internal.h"
#include "mb_buffer.h"
#include "idct.h"
void idct_block_mmx(sint_16* foo);
void
idct_mmx(mb_buffer_t *mb_buffer)
idct_end_mmx()
{
uint_32 k;
macroblock_t *mb = mb_buffer->macroblocks;
uint_32 num_blocks = mb_buffer->num_blocks;
for(k=0;k<num_blocks;k++)
{
if(mb[k].skipped)
continue;
//XXX only 4:2:0 supported here
if(mb[k].coded_block_pattern & 0x20)
idct_block_mmx(mb[k].y_blocks + 64*0);
if(mb[k].coded_block_pattern & 0x10)
idct_block_mmx(mb[k].y_blocks + 64*1);
if(mb[k].coded_block_pattern & 0x08)
idct_block_mmx(mb[k].y_blocks + 64*2);
if(mb[k].coded_block_pattern & 0x04)
idct_block_mmx(mb[k].y_blocks + 64*3);
if(mb[k].coded_block_pattern & 0x2)
idct_block_mmx(mb[k].cr_blocks);
if(mb[k].coded_block_pattern & 0x1)
idct_block_mmx(mb[k].cb_blocks);
}
asm volatile("emms\n\t");
emms();
}
......@@ -23,4 +23,5 @@
*
*/
void idct_mmx(mb_buffer_t *mb_buffer);
void idct_block_mmx(sint_16 *block);
void idct_end_mmx();
/*
* mb_buffer.c
*
* Copyright (C) Aaron Holtzman <aholtzma@ess.engr.uvic.ca> - Nov 1999
*
* Decodes an MPEG-2 video stream.
*
* This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
*
* mpeg2dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* mpeg2dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation,
*
*/
#include <stdlib.h>
#include <stdio.h>
#include "mpeg2.h"
#include "mpeg2_internal.h"
#include "mb_buffer.h"
//FIXME dynamically set this
#define MACROBLOCK_BUFFER_SIZE 2000
macroblock_t *macroblocks;
uint_32 num_blocks = 0;
macroblock_t*
mb_buffer_init(uint_32 chroma_format)
{
uint_32 i;
num_blocks = 0;
macroblocks = malloc(MACROBLOCK_BUFFER_SIZE * sizeof(macroblock_t));
if(!macroblocks)
return 0;
macroblocks[0].y_blocks = malloc(sizeof(sint_16) * 64 * 4 * MACROBLOCK_BUFFER_SIZE);
macroblocks[0].cr_blocks = malloc(sizeof(sint_16) * 64 * MACROBLOCK_BUFFER_SIZE);
macroblocks[0].cb_blocks = malloc(sizeof(sint_16) * 64 * MACROBLOCK_BUFFER_SIZE);
if((!macroblocks[0].y_blocks) || (!macroblocks[0].cr_blocks) || (!macroblocks[0].cr_blocks))
return 0;
for(i=1;i < MACROBLOCK_BUFFER_SIZE;i++)
{
macroblocks[i].y_blocks = macroblocks[i - 1].y_blocks + 64 * 4;
macroblocks[i].cr_blocks = macroblocks[i - 1].cr_blocks + 64;
macroblocks[i].cb_blocks = macroblocks[i - 1].cb_blocks + 64;
}
return macroblocks;
}
macroblock_t*
mb_buffer_increment()
{
num_blocks++;
if (num_blocks == MACROBLOCK_BUFFER_SIZE)
return 0;
return &macroblocks[num_blocks];
}
void
mb_buffer_flush(mb_buffer_t *mb_buffer)
{
mb_buffer->macroblocks = macroblocks;
mb_buffer->num_blocks = num_blocks;
num_blocks = 0;
}
/*
* mb_buffer.h
*
* Copyright (C) Aaron Holtzman <aholtzma@ess.engr.uvic.ca> - Nov 1999
*
* Decodes an MPEG-2 video stream.
*
* This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
*
* mpeg2dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* mpeg2dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation,
*
*/
typedef struct mb_buffer_s
{
macroblock_t *macroblocks;
uint_32 num_blocks;
} mb_buffer_t;
macroblock_t* mb_buffer_init(uint_32 chroma_format);
macroblock_t* mb_buffer_increment();
void mb_buffer_flush(mb_buffer_t *mb_buffer);
......@@ -34,7 +34,6 @@
#include "mpeg2_internal.h"
#include "debug.h"
#include "mb_buffer.h"
#include "motion_comp.h"
#include "motion_comp_mmx.h"
//#include "motion_comp_mlib.h"
......@@ -129,12 +128,8 @@ void motion_block (void (** table) (uint_8 *, uint_8 *, sint_32, sint_32),
}
void
motion_comp (picture_t * picture, mb_buffer_t * mb_buffer)
motion_comp (picture_t * picture, macroblock_t *mb)
{
macroblock_t * macroblocks;
macroblock_t * mb;
uint_32 num_blocks;
int i;
int width, x, y;
int mb_width;
int pitch;
......@@ -145,120 +140,122 @@ motion_comp (picture_t * picture, mb_buffer_t * mb_buffer)
width = picture->coded_picture_width;
mb_width = width >> 4;
macroblocks = mb_buffer->macroblocks;
num_blocks = mb_buffer->num_blocks;
for (i = 0; i < num_blocks; i++)
//handle interlaced blocks