Commit faec300a authored by Loren Merritt's avatar Loren Merritt

support interlace. uses MBAFF syntax, but is not adaptive yet.


git-svn-id: svn://svn.videolan.org/x264/trunk@570 df754926-b1dd-0310-bc7b-ec298dee348c
parent 3b785705
......@@ -829,7 +829,7 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
cabac_context_init = &x264_cabac_context_init_PB[i_model];
}
for( i = 0; i < 436; i++ )
for( i = 0; i < 460; i++ )
{
cb->state[i] = x264_clip3( (((*cabac_context_init)[i][0] * i_qp) >> 4) + (*cabac_context_init)[i][1], 1, 126 );
}
......
......@@ -27,8 +27,7 @@
typedef struct
{
/* context */
/* #436-459 are for interlacing, so are omitted for now */
uint8_t state[436];
uint8_t state[460];
/* state */
int i_low;
......
......@@ -292,6 +292,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
p->b_cabac = atobool(value);
OPT("cabac-idc")
p->i_cabac_init_idc = atoi(value);
OPT("interlaced")
p->b_interlaced = atobool(value);
OPT("cqm")
{
if( strstr( value, "flat" ) )
......
......@@ -31,6 +31,7 @@
#endif
#include <stdarg.h>
#include <stdlib.h>
#include <assert.h>
#ifdef _MSC_VER
#define snprintf _snprintf
......@@ -193,6 +194,7 @@ typedef struct
int i_frame_num;
int b_mbaff;
int b_field_pic;
int b_bottom_field;
......@@ -397,6 +399,8 @@ struct x264_t
int b_trellis;
int b_noise_reduction;
int b_interlaced;
/* Allowed qpel MV range to stay within the picture + emulated edge pixels */
int mv_min[2];
int mv_max[2];
......@@ -416,6 +420,8 @@ struct x264_t
int i_mb_type_left;
int i_mb_type_topleft;
int i_mb_type_topright;
int i_mb_prev_xy;
int i_mb_top_xy;
/* mb table */
int8_t *type; /* mb type */
......@@ -427,7 +433,7 @@ struct x264_t
int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */
int16_t (*mvd[2])[2]; /* mb mv difference with predict. set to 0 if intra. cabac only */
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */
int16_t (*mvr[2][32])[2]; /* 16x16 mv for each possible ref */
int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
......@@ -458,7 +464,8 @@ struct x264_t
uint8_t *p_fdec[3];
/* pointer over mb of the references */
uint8_t *p_fref[2][16][4+2]; /* last: lN, lH, lV, lHV, cU, cV */
int i_fref[2];
uint8_t *p_fref[2][32][4+2]; /* last: lN, lH, lV, lHV, cU, cV */
uint16_t *p_integral[2][16];
/* fref stride */
......@@ -490,6 +497,7 @@ struct x264_t
/* number of neighbors (top and left) that used 8x8 dct */
int i_neighbour_transform_size;
int b_transform_8x8_allowed;
int i_neighbour_interlaced;
} cache;
/* */
......@@ -502,8 +510,8 @@ struct x264_t
int b_direct_auto_write; /* analyse direct modes, to use and/or save */
/* B_direct and weighted prediction */
int dist_scale_factor[16][16];
int bipred_weight[16][16];
int dist_scale_factor[16][2];
int bipred_weight[32][4];
/* maps fref1[0]'s ref indices into the current list0 */
int map_col_to_list0_buf[2]; // for negative indices
int map_col_to_list0[16];
......@@ -532,7 +540,7 @@ struct x264_t
int i_mb_count_skip;
int i_mb_count_8x8dct[2];
int i_mb_count_size[7];
int i_mb_count_ref[16];
int i_mb_count_ref[32];
/* Estimated (SATD) cost as Intra/Predicted frame */
/* XXX: both omit the cost of MBs coded as P_SKIP */
int i_intra_cost;
......@@ -558,7 +566,7 @@ struct x264_t
int64_t i_mb_count[5][19];
int64_t i_mb_count_8x8dct[2];
int64_t i_mb_count_size[2][7];
int64_t i_mb_count_ref[2][16];
int64_t i_mb_count_ref[2][32];
/* */
int i_direct_score[2];
int i_direct_frames[2];
......
......@@ -29,6 +29,8 @@
# include "ppc/dct.h"
#endif
int x264_dct4_weight2_zigzag[2][16];
int x264_dct8_weight2_zigzag[2][64];
static inline int clip_uint8( int a )
{
......@@ -441,3 +443,15 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
#endif
}
void x264_dct_init_weights( void )
{
int i, j;
for( j=0; j<2; j++ )
{
for( i=0; i<16; i++ )
x264_dct4_weight2_zigzag[j][i] = x264_dct4_weight2_tab[ x264_zigzag_scan4[j][i] ];
for( i=0; i<64; i++ )
x264_dct8_weight2_zigzag[j][i] = x264_dct8_weight2_tab[ x264_zigzag_scan8[j][i] ];
}
}
......@@ -54,10 +54,6 @@ static const int x264_dct4_weight2_tab[16] = {
W(0), W(1), W(0), W(1),
W(1), W(2), W(1), W(2)
};
static const int x264_dct4_weight2_zigzag[16] = {
W(0), W(1), W(1), W(0), W(2), W(0), W(1), W(1),
W(1), W(1), W(2), W(0), W(2), W(1), W(1), W(2)
};
#undef W
#define W(i) (i==0 ? FIX8(1.00000) :\
......@@ -77,18 +73,11 @@ static const int x264_dct8_weight2_tab[64] = {
W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5),
W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1)
};
static const int x264_dct8_weight2_zigzag[64] = {
W(0), W(3), W(3), W(4), W(1), W(4), W(3), W(5),
W(5), W(3), W(0), W(1), W(2), W(1), W(0), W(3),
W(3), W(5), W(5), W(3), W(3), W(4), W(1), W(4),
W(1), W(4), W(1), W(4), W(3), W(5), W(5), W(3),
W(3), W(5), W(5), W(3), W(1), W(2), W(1), W(0),
W(1), W(2), W(1), W(5), W(5), W(3), W(3), W(5),
W(5), W(1), W(4), W(1), W(4), W(1), W(3), W(5),
W(5), W(3), W(1), W(2), W(1), W(5), W(5), W(1)
};
#undef W
extern int x264_dct4_weight2_zigzag[2][16]; // [2] = {frame, field}
extern int x264_dct8_weight2_zigzag[2][64];
typedef struct
{
// pix1 stride = FENC_STRIDE
......@@ -118,5 +107,6 @@ typedef struct
} x264_dct_function_t;
void x264_dct_init( int cpu, x264_dct_function_t *dctf );
void x264_dct_init_weights( void );
#endif
......@@ -26,6 +26,9 @@
#include "common.h"
#define PADH 32
#define PADV 32
x264_frame_t *x264_frame_new( x264_t *h )
{
x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
......@@ -34,14 +37,17 @@ x264_frame_t *x264_frame_new( x264_t *h )
int i_mb_count = h->mb.i_mb_count;
int i_stride;
int i_lines;
int i_padv = PADV << h->param.b_interlaced;
if( !frame ) return NULL;
memset( frame, 0, sizeof(x264_frame_t) );
/* allocate frame data (+64 for extra data for me) */
i_stride = ( ( h->param.i_width + 15 )&0xfffff0 )+ 64;
i_lines = ( ( h->param.i_height + 15 )&0xfffff0 );
i_stride = ( ( h->param.i_width + 15 ) & -16 )+ 2*PADH;
i_lines = ( ( h->param.i_height + 15 ) & -16 );
if( h->param.b_interlaced )
i_lines = ( i_lines + 31 ) & -32;
frame->i_plane = 3;
for( i = 0; i < 3; i++ )
......@@ -58,9 +64,10 @@ x264_frame_t *x264_frame_new( x264_t *h )
frame->i_stride[i] = i_stride / i_divw;
frame->i_lines[i] = i_lines / i_divh;
CHECKED_MALLOC( frame->buffer[i],
frame->i_stride[i] * ( frame->i_lines[i] + 64 / i_divh ) );
frame->i_stride[i] * ( frame->i_lines[i] + 2*i_padv / i_divh ) );
frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
frame->i_stride[i] * 32 / i_divh + 32 / i_divw;
frame->i_stride[i] * i_padv / i_divh + PADH / i_divw;
}
frame->i_stride[3] = 0;
frame->i_lines[3] = 0;
......@@ -71,29 +78,29 @@ x264_frame_t *x264_frame_new( x264_t *h )
for( i = 0; i < 3; i++ )
{
CHECKED_MALLOC( frame->buffer[4+i],
frame->i_stride[0] * ( frame->i_lines[0] + 64 ) );
frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
frame->i_stride[0] * 32 + 32;
frame->i_stride[0] * i_padv + PADH;
}
if( h->frames.b_have_lowres )
{
frame->i_stride_lowres = frame->i_stride[0]/2 + 32;
frame->i_stride_lowres = frame->i_stride[0]/2 + PADH;
frame->i_lines_lowres = frame->i_lines[0]/2;
for( i = 0; i < 4; i++ )
{
CHECKED_MALLOC( frame->buffer[7+i],
frame->i_stride_lowres * ( frame->i_lines[0]/2 + 64 ) );
frame->lowres[i] = ((uint8_t*)frame->buffer[7+i]) +
frame->i_stride_lowres * 32 + 32;
CHECKED_MALLOC( frame->buffer_lowres[i],
frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
frame->i_stride_lowres * i_padv + PADH;
}
}
if( h->param.analyse.i_me_method == X264_ME_ESA )
{
CHECKED_MALLOC( frame->buffer[11],
2 * frame->i_stride[0] * (frame->i_lines[0] + 64) * sizeof(uint16_t) );
frame->integral = (uint16_t*)frame->buffer[11] + frame->i_stride[0] * 32 + 32;
CHECKED_MALLOC( frame->buffer[7],
2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
}
frame->i_poc = -1;
......@@ -133,8 +140,10 @@ fail:
void x264_frame_delete( x264_frame_t *frame )
{
int i, j;
for( i = 0; i < 12; i++ )
for( i = 0; i < 8; i++ )
x264_free( frame->buffer[i] );
for( i = 0; i < 4; i++ )
x264_free( frame->buffer_lowres[i] );
for( i = 0; i < X264_BFRAME_MAX+2; i++ )
for( j = 0; j < X264_BFRAME_MAX+2; j++ )
x264_free( frame->i_row_satds[i][j] );
......@@ -189,52 +198,77 @@ void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src
static void plane_expand_border( uint8_t *pix, int i_stride, int i_height, int i_pad )
static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv )
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
const int i_width = i_stride - 2*i_pad;
int y;
for( y = 0; y < i_height; y++ )
{
/* left band */
memset( PPIXEL(-i_pad, y), PPIXEL(0, y)[0], i_pad );
memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
/* right band */
memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_pad );
memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
}
/* upper band */
for( y = 0; y < i_pad; y++ )
memcpy( PPIXEL(-i_pad, -y-1), PPIXEL(-i_pad, 0), i_stride );
for( y = 0; y < i_padv; y++ )
memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
/* lower band */
for( y = 0; y < i_pad; y++ )
memcpy( PPIXEL(-i_pad, i_height+y), PPIXEL(-i_pad, i_height-1), i_stride );
for( y = 0; y < i_padv; y++ )
memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
#undef PPIXEL
}
void x264_frame_expand_border( x264_frame_t *frame )
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame )
{
int i;
for( i = 0; i < frame->i_plane; i++ )
{
int i_pad = i ? 16 : 32;
plane_expand_border( frame->plane[i], frame->i_stride[i], frame->i_lines[i], i_pad );
int stride = frame->i_stride[i];
int width = 16*h->sps->i_mb_width >> !!i;
int height = 16*h->sps->i_mb_height >> !!i;
int padh = PADH >> !!i;
int padv = PADV >> !!i;
if( h->param.b_interlaced )
{
plane_expand_border( frame->plane[i], stride*2, width, height>>1, padh, padv );
plane_expand_border( frame->plane[i]+stride, stride*2, width, height>>1, padh, padv );
}
else
{
plane_expand_border( frame->plane[i], stride, width, height, padh, padv );
}
}
}
void x264_frame_expand_border_filtered( x264_frame_t *frame )
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame )
{
/* during filtering, 8 extra pixels were filtered on each edge.
we want to expand border from the last filtered pixel */
int stride = frame->i_stride[0];
int width = 16*h->sps->i_mb_width;
int height = 16*h->sps->i_mb_height;
int padh = PADH - 8;
int padv = PADV - 8;
int i;
for( i = 1; i < 4; i++ )
plane_expand_border( frame->filtered[i] - 8*frame->i_stride[0] - 8, frame->i_stride[0], frame->i_lines[0]+2*8, 24 );
{
if( h->param.b_interlaced )
{
plane_expand_border( frame->filtered[i] - 16*stride - 8, stride*2, width+16, (height>>1)+16, padh, padv );
plane_expand_border( frame->filtered[i] - 15*stride - 8, stride*2, width+16, (height>>1)+16, padh, padv );
}
else
{
plane_expand_border( frame->filtered[i] - 8*stride - 8, stride, width+16, height+16, padh, padv );
}
}
}
void x264_frame_expand_border_lowres( x264_frame_t *frame )
{
int i;
for( i = 0; i < 4; i++ )
plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_lines_lowres, 32 );
plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV );
}
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
......@@ -257,6 +291,7 @@ void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
}
if( i_pady )
{
//FIXME interlace? or just let it pad using the wrong field
for( y = i_height; y < i_height + i_pady; y++ )
memcpy( &frame->plane[i][y*frame->i_stride[i]],
&frame->plane[i][(i_height-1)*frame->i_stride[i]],
......@@ -500,9 +535,14 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
{
const int s8x8 = 2 * h->mb.i_mb_stride;
const int s4x4 = 4 * h->mb.i_mb_stride;
const int b_interlaced = h->param.b_interlaced;
const int mvy_limit = 4 >> b_interlaced;
int mb_y, mb_x;
int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
h->fdec->i_stride[1] << b_interlaced,
h->fdec->i_stride[2] << b_interlaced };
for( mb_y = 0, mb_x = 0; mb_y < h->sps->i_mb_height; )
{
const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
......@@ -512,6 +552,16 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
int i_edge, i_dir;
int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
8*mb_y*h->fdec->i_stride[1] + 8*mb_x,
8*mb_y*h->fdec->i_stride[2] + 8*mb_x };
if( b_interlaced && (mb_y&1) )
{
i_pix_y[0] -= 15*h->fdec->i_stride[0];
i_pix_y[1] -= 7*h->fdec->i_stride[1];
i_pix_y[2] -= 7*h->fdec->i_stride[2];
}
/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
* entropy coding, but per 64 coeffs for the purpose of deblocking */
if( !h->param.b_cabac && b_8x8_transform )
......@@ -527,7 +577,7 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
* i_dir == 1 -> horizontal edge */
for( i_dir = 0; i_dir < 2; i_dir++ )
{
int i_start = (i_dir ? mb_y : mb_x) ? 0 : 1;
int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
int i_qp, i_qpn;
for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
......@@ -542,10 +592,17 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
if( b_interlaced && i_edge == 0 && i_dir == 1 )
{
mbn_xy -= h->mb.i_mb_stride;
mbn_8x8 -= 2 * s8x8;
mbn_4x4 -= 4 * s4x4;
}
/* *** Get bS for each 4px for the current edge *** */
if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
{
bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 ? 4 : 3 );
bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
}
else
{
......@@ -581,7 +638,7 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
{
if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= 4 )
abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
{
bS[i] = 1;
break;
......@@ -599,51 +656,54 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
if( i_dir == 0 )
{
/* vertical edge */
deblock_edge( h, &h->fdec->plane[0][16*mb_y * h->fdec->i_stride[0] + 16*mb_x + 4*i_edge],
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1, 0,
deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
if( !(i_edge & 1) )
{
/* U/V planes */
int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
deblock_edge( h, &h->fdec->plane[1][8*(mb_y*h->fdec->i_stride[1]+mb_x)+2*i_edge],
h->fdec->i_stride[1], bS, i_qpc, 1,
deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
i_stride2[1], bS, i_qpc, 1,
h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
deblock_edge( h, &h->fdec->plane[2][8*(mb_y*h->fdec->i_stride[2]+mb_x)+2*i_edge],
h->fdec->i_stride[2], bS, i_qpc, 1,
deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
i_stride2[2], bS, i_qpc, 1,
h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
}
}
else
{
/* horizontal edge */
deblock_edge( h, &h->fdec->plane[0][(16*mb_y + 4*i_edge) * h->fdec->i_stride[0] + 16*mb_x],
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1, 0,
deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
/* U/V planes */
if( !(i_edge & 1) )
{
int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
deblock_edge( h, &h->fdec->plane[1][8*(mb_y*h->fdec->i_stride[1]+mb_x)+2*i_edge*h->fdec->i_stride[1]],
h->fdec->i_stride[1], bS, i_qpc, 1,
deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
i_stride2[1], bS, i_qpc, 1,
h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
deblock_edge( h, &h->fdec->plane[2][8*(mb_y*h->fdec->i_stride[2]+mb_x)+2*i_edge*h->fdec->i_stride[2]],
h->fdec->i_stride[2], bS, i_qpc, 1,
deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
i_stride2[2], bS, i_qpc, 1,
h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
}
}
}
}
/* newt mb */
mb_x++;
/* next mb */
if( !b_interlaced || (mb_y&1) )
mb_x++;
if( mb_x >= h->sps->i_mb_width )
{
mb_x = 0;
mb_y++;
}
else
mb_y ^= b_interlaced;
}
}
......
......@@ -49,7 +49,8 @@ typedef struct
/* for unrestricted mv we allocate more data than needed
* allocated data are stored in buffer */
void *buffer[12];
void *buffer[8];
void *buffer_lowres[4];
/* motion data */
int8_t *mb_type;
......@@ -90,14 +91,14 @@ void x264_frame_delete( x264_frame_t *frame );
void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src );
void x264_frame_expand_border( x264_frame_t *frame );
void x264_frame_expand_border_filtered( x264_frame_t *frame );
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame );
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame );
void x264_frame_expand_border_lowres( x264_frame_t *frame );
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame );
void x264_frame_deblocking_filter( x264_t *h, int i_slice_type );
void x264_frame_filter( int cpu, x264_frame_t *frame );
void x264_frame_filter( int cpu, x264_frame_t *frame, int b_interlaced );
void x264_frame_init_lowres( int cpu, x264_frame_t *frame );
void x264_deblock_init( int cpu, x264_deblock_function_t *pf );
......
......@@ -504,7 +504,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[8][2
}
if( h->mb.i_neighbour & MB_TOP )
{
int i_mb_t = h->mb.i_mb_xy - h->mb.i_mb_stride;
int i_mb_t = h->mb.i_mb_top_xy;
if( !IS_SKIP( h->mb.type[i_mb_t] ) )
SET_MVP( mvr[i_mb_t] );
......@@ -516,7 +516,8 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[8][2
#undef SET_MVP
/* temporal predictors */
if( h->fref0[0]->i_ref[0] > 0 )
/* FIXME temporal scaling w/ interlace */
if( h->fref0[0]->i_ref[0] > 0 && !h->sh.b_mbaff )
{
x264_frame_t *l0 = h->fref0[0];
int ref_col_cur, ref_col_prev = -1;
......@@ -555,12 +556,16 @@ static inline void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int hei
const int i8 = x264_scan8[0]+x+8*y;
const int i_ref = h->mb.cache.ref[0][i8];
const int mvx = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
const int mvy = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
int mvy = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
h->mc.mc_luma( h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
&h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
// chroma is offset if MCing from a field of opposite parity
if( h->mb.b_interlaced & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( &h->mb.pic.p_fref[0][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
&h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
mvx, mvy, 2*width, 2*height );
......@@ -574,12 +579,15 @@ static inline void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int hei
const int i8 = x264_scan8[0]+x+8*y;
const int i_ref = h->mb.cache.ref[1][i8];
const int mvx = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
const int mvy = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
int mvy = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
&h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
mvx + 4*4*x, mvy + 4*4*y, 4*width, 4*height );
if( h->mb.b_interlaced & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
&h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE,
mvx, mvy, 2*width, 2*height );
......@@ -595,7 +603,7 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
const int i_ref1 = h->mb.cache.ref[1][i8];
const int mvx1 = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
const int mvy1 = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
int mvy1 = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
DECLARE_ALIGNED( uint8_t, tmp[16*16], 16 );
int i_mode = x264_size2pixel[height][width];
......@@ -604,6 +612,9 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
tmp, 16, mvx1 + 4*4*x, mvy1 + 4*4*y, 4*width, 4*height );
if( h->mb.b_interlaced & i_ref1 )
mvy1 += (h->mb.i_mb_y & 1)*4 - 2;
if( h->param.analyse.b_weighted_bipred )
{
const int i_ref0 = h->mb.cache.ref[0][i8];
......@@ -823,6 +834,8 @@ int x264_macroblock_cache_init( x264_t *h )
h->mb.i_b8_stride = h->sps->i_mb_width * 2;
h->mb.i_b4_stride = h->sps->i_mb_width * 4;
h->mb.b_interlaced = h->param.b_interlaced;
CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
......@@ -843,8 +856,8 @@ int x264_macroblock_cache_init( x264_t *h )
for( i=0; i<2; i++ )
{
int i_refs = (i ? 1 : h->param.i_frame_reference) + h->param.b_bframe_pyramid;
for( j=0; j < i_refs && j < 16; j++ )
int i_refs = X264_MIN(16, (i ? 1 : h->param.i_frame_reference) + h->param.b_bframe_pyramid) << h->param.b_interlaced;
for( j=0; j < i_refs; j++ )
CHECKED_MALLOC( h->mb.mvr[i][j], 2 * i_mb_count * sizeof(int16_t) );
}
......@@ -917,80 +930,33 @@ void x264_macroblock_slice_init( x264_t *h )
void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
{
const int i_mb_4x4 = 4*(i_mb_y * h->mb.i_b4_stride + i_mb_x);
const int i_mb_8x8 = 2*(i_mb_y * h->mb.i_b8_stride + i_mb_x);
int i_mb_xy = i_mb_y * h->mb.i_mb_stride + i_mb_x;
int i_top_xy = i_mb_xy - h->mb.i_mb_stride;
int i_mb_4x4 = 4*(i_mb_y * h->mb.i_b4_stride + i_mb_x);
int i_mb_8x8 = 2*(i_mb_y * h->mb.i_b8_stride + i_mb_x);
int i_top_y = i_mb_y - (1 << h->mb.b_interlaced);
int i_top_xy = i_top_y * h->mb.i_mb_stride + i_mb_x;
int i_top_4x4 = (4*i_top_y+3) * h->mb.i_b4_stride + 4*i_mb_x;
int i_top_8x8 = (2*i_top_y+1) * h->mb.i_b8_stride + 2*i_mb_x;
int i_left_xy = -1;
int i_top_type = -1; /* gcc warn */
int i_left_type= -1;
int i;
assert( h->mb.i_b8_stride == 2*h->mb.i_mb_stride );
assert( h->mb.i_b4_stride == 4*h->mb.i_mb_stride );
/* init index */
h->mb.i_mb_x = i_mb_x;
h->mb.i_mb_y = i_mb_y;
h->mb.i_mb_xy = i_mb_xy;
h->mb.i_b8_xy = i_mb_8x8;
h->mb.i_b4_xy = i_mb_4x4;
h->mb.i_mb_top_xy = i_top_xy;
h->mb.i_neighbour = 0;
/* fdec: fenc:
* yyyyyyy
* yYYYY YYYY
* yYYYY YYYY
* yYYYY YYYY
* yYYYY YYYY
* uuu vvv UUVV
* uUU vVV UUVV
* uUU vVV
*/
h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE;
h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 19*FDEC_STRIDE + 16;
/* load picture pointers */
for( i = 0; i < 3; i++ )