Commit 52fb8334 authored by Loren Merritt's avatar Loren Merritt

don't distinguish between luma4x4 and luma4x4ac

parent b437d2d4
......@@ -338,13 +338,9 @@ struct x264_t
{
DECLARE_ALIGNED( int16_t, luma16x16_dc[16], 16 );
DECLARE_ALIGNED( int16_t, chroma_dc[2][4], 16 );
// FIXME merge with union
// FIXME share memory?
DECLARE_ALIGNED( int16_t, luma8x8[4][64], 16 );
union
{
DECLARE_ALIGNED( int16_t, residual_ac[15], 16 );
DECLARE_ALIGNED( int16_t, luma4x4[16], 16 );
} block[16+8];
DECLARE_ALIGNED( int16_t, luma4x4[16+8][16], 16 );
} dct;
/* MB table and cache for current frame/mb */
......
......@@ -521,22 +521,6 @@ static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
*(uint64_t*)(level+12) = *(uint64_t*)(*dct+12);
}
static void zigzag_scan_4x4ac_frame( int16_t level[15], int16_t dct[4][4] )
{
ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
}
static void zigzag_scan_4x4ac_field( int16_t level[15], int16_t dct[4][4] )
{
ZIG( 0,1,0) ZIG( 1,0,1) ZIG( 2,2,0)
ZIG( 3,3,0) ZIG( 4,1,1) ZIG( 5,2,1) ZIG( 6,3,1)
ZIG( 7,0,2) ZIG( 8,1,2) ZIG( 9,2,2) ZIG(10,3,2)
ZIG(11,0,3) ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,3)
}
#undef ZIG
#define ZIG(i,y,x) {\
int oe = x+y*FENC_STRIDE;\
......@@ -567,24 +551,6 @@ static void zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8
COPY4x4
}
static void zigzag_sub_4x4ac_frame( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst )
{
ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
COPY4x4
}
static void zigzag_sub_4x4ac_field( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst )
{
ZIG( 0,1,0) ZIG( 1,0,1) ZIG( 2,2,0)
ZIG( 3,3,0) ZIG( 4,1,1) ZIG( 5,2,1) ZIG( 6,3,1)
ZIG( 7,0,2) ZIG( 8,1,2) ZIG( 9,2,2) ZIG(10,3,2)
ZIG(11,0,3) ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,3)
COPY4x4
}
#undef ZIG
#undef COPY4x4
......@@ -594,9 +560,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
{
pf->scan_8x8 = zigzag_scan_8x8_field;
pf->scan_4x4 = zigzag_scan_4x4_field;
pf->scan_4x4ac = zigzag_scan_4x4ac_field;
pf->sub_4x4 = zigzag_sub_4x4_field;
pf->sub_4x4ac = zigzag_sub_4x4ac_field;
#ifdef HAVE_MMX
if( cpu&X264_CPU_MMXEXT )
pf->scan_4x4 = x264_zigzag_scan_4x4_field_mmxext;
......@@ -604,20 +568,14 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
{
pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
pf->scan_4x4ac = x264_zigzag_scan_4x4ac_field_altivec;
}
#endif
}
else
{
pf->scan_8x8 = zigzag_scan_8x8_frame;
pf->scan_4x4 = zigzag_scan_4x4_frame;
pf->scan_4x4ac = zigzag_scan_4x4ac_frame;
pf->sub_4x4 = zigzag_sub_4x4_frame;
pf->sub_4x4ac = zigzag_sub_4x4ac_frame;
#ifdef HAVE_SSE3
if( cpu&X264_CPU_SSSE3 )
pf->sub_4x4 = x264_zigzag_sub_4x4_frame_ssse3;
......@@ -625,10 +583,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
{
pf->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
pf->scan_4x4ac = x264_zigzag_scan_4x4ac_frame_altivec;
}
#endif
}
}
......@@ -110,9 +110,7 @@ typedef struct
{
void (*scan_8x8)( int16_t level[64], int16_t dct[8][8] );
void (*scan_4x4)( int16_t level[16], int16_t dct[4][4] );
void (*scan_4x4ac)( int16_t level[15], int16_t dct[4][4] );
void (*sub_4x4)( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst );
void (*sub_4x4ac)( int16_t level[15], const uint8_t *p_src, uint8_t *p_dst );
} x264_zigzag_function_t;
......
......@@ -491,38 +491,3 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] )
vec_st( tmp1v, 0x10, level );
}
void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] )
{
vec_s16_t dct0v, dct1v;
vec_s16_t tmp0v, tmp1v;
dct0v = vec_ld(0x00, (int16_t*)dct);
dct1v = vec_ld(0x10, (int16_t*)dct);
const vec_u8_t sel0 = (vec_u8_t) CV(8,9,2,3,4,5,10,11,16,17,24,25,18,19,12,13);
const vec_u8_t sel1 = (vec_u8_t) CV(6,7,14,15,20,21,26,27,28,29,22,23,30,31,0,1);
tmp0v = vec_perm( dct0v, dct1v, sel0 );
tmp1v = vec_perm( dct0v, dct1v, sel1 );
vec_st( tmp0v, 0x00, level );
vec_st( tmp1v, 0x10, level );
}
void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] )
{
vec_s16_t dct0v, dct1v;
vec_s16_t tmp0v, tmp1v;
dct0v = vec_ld(0x00, (int16_t*)dct);
dct1v = vec_ld(0x10, (int16_t*)dct);
const vec_u8_t sel0 = (vec_u8_t) CV(2,3,8,9,4,5,6,7,10,11,12,13,14,15,16,17);
const vec_u8_t sel1 = (vec_u8_t) CV(18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1);
tmp0v = vec_perm( dct0v, dct1v, sel0 );
tmp1v = vec_perm( dct0v, dct1v, sel1 );
vec_st( tmp0v, 0x00, level );
vec_st( tmp1v, 0x10, level );
}
......@@ -45,9 +45,6 @@ void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] );
void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][8][8] );
void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[4][4] );
void x264_zigzag_scan_4x4ac_frame_altivec( int16_t level[15], int16_t dct[4][4] );
void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] );
void x264_zigzag_scan_4x4ac_field_altivec( int16_t level[15], int16_t dct[4][4] );
#endif
......@@ -741,7 +741,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
{
h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
if( h->mb.i_skip_intra == 2 )
h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.block, sizeof(h->mb.pic.i4x4_dct_buf) );
h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.luma4x4, sizeof(h->mb.pic.i4x4_dct_buf) );
}
}
else
......
......@@ -1026,7 +1026,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
/* AC Luma */
if( h->mb.i_cbp_luma != 0 )
for( i = 0; i < 16; i++ )
block_residual_write_cabac( h, cb, DCT_LUMA_AC, i, h->dct.block[i].residual_ac, 15 );
block_residual_write_cabac( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 15 );
}
else if( h->mb.b_transform_8x8 )
{
......@@ -1038,7 +1038,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
{
for( i = 0; i < 16; i++ )
if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) )
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i, h->dct.block[i].luma4x4, 16 );
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], 16 );
}
if( h->mb.i_cbp_chroma &0x03 ) /* Chroma DC residual present */
......@@ -1049,7 +1049,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
{
for( i = 16; i < 24; i++ )
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.block[i].residual_ac, 15 );
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 );
}
}
......@@ -1119,12 +1119,12 @@ void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel
{
int i4;
for( i4 = 0; i4 < 4; i4++ )
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
}
}
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.block[16+i8].residual_ac, 15 );
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.block[20+i8].residual_ac, 15 );
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 15 );
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 15 );
i8 += x264_pixel_size[i_pixel].h >> 3;
}
......@@ -1143,7 +1143,7 @@ static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4,
const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
i_mode = x264_mb_pred_mode4x4_fix( i_mode );
x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.block[i4].luma4x4, 16 );
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
}
static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
......@@ -1158,7 +1158,7 @@ static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
{
int i;
for( i = 16; i < 24; i++ )
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.block[i].residual_ac, 15 );
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 );
}
}
}
......
......@@ -304,16 +304,16 @@ static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8s
for( i4 = 0; i4 < 4; i4++ )
{
for( i = 0; i < 16; i++ )
h->dct.block[i4+i8*4].luma4x4[i] = h->dct.luma8x8[i8][i4+i*4];
h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4];
h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] =
array_non_zero_count( h->dct.block[i4+i8*4].luma4x4, 16 );
array_non_zero_count( h->dct.luma4x4[i4+i8*4], 16 );
}
}
for( i8 = i8start; i8 <= i8end; i8++ )
if( h->mb.i_cbp_luma & (1 << i8) )
for( i4 = 0; i4 < 4; i4++ )
block_residual_write_cavlc( h, s, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
block_residual_write_cavlc( h, s, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
}
/*****************************************************************************
......@@ -666,7 +666,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
/* AC Luma */
if( h->mb.i_cbp_luma != 0 )
for( i = 0; i < 16; i++ )
block_residual_write_cavlc( h, s, i, h->dct.block[i].residual_ac, 15 );
block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 );
}
else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 )
{
......@@ -680,7 +680,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 );
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
for( i = 16; i < 24; i++ )
block_residual_write_cavlc( h, s, i, h->dct.block[i].residual_ac, 15 );
block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 );
}
#ifndef RDO_SKIP_BS
......@@ -746,8 +746,8 @@ int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
{
x264_macroblock_luma_write_cavlc( h, &s, i8, i8 );
block_residual_write_cavlc( h, &s, 16+i8, h->dct.block[16+i8].residual_ac, 15 );
block_residual_write_cavlc( h, &s, 20+i8, h->dct.block[20+i8].residual_ac, 15 );
block_residual_write_cavlc( h, &s, 16+i8, h->dct.luma4x4[16+i8]+1, 15 );
block_residual_write_cavlc( h, &s, 20+i8, h->dct.luma4x4[20+i8]+1, 15 );
i8 += x264_pixel_size[i_pixel].h >> 3;
}
......@@ -770,10 +770,10 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
for( i4 = 0; i4 < 4; i4++ )
{
for( i = 0; i < 16; i++ )
h->dct.block[i4+i8*4].luma4x4[i] = h->dct.luma8x8[i8][i4+i*4];
h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4];
h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] =
array_non_zero_count( h->dct.block[i4+i8*4].luma4x4, 16 );
block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
array_non_zero_count( h->dct.luma4x4[i4+i8*4], 16 );
block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
}
return h->out.bs.i_bits_encoded;
}
......@@ -781,7 +781,7 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
{
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.block[i4].luma4x4, 16 );
block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.luma4x4[i4], 16 );
return h->out.bs.i_bits_encoded;
}
......@@ -797,7 +797,7 @@ static int x264_i8x8_chroma_size_cavlc( x264_t *h )
{
int i;
for( i = 16; i < 24; i++ )
block_residual_write_cavlc( h, &h->out.bs, i, h->dct.block[i].residual_ac, 15 );
block_residual_write_cavlc( h, &h->out.bs, i, h->dct.luma4x4[i]+1, 15 );
}
}
return h->out.bs.i_bits_encoded;
......
......@@ -89,7 +89,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
if( h->mb.b_lossless )
{
h->zigzagf.sub_4x4( h->dct.block[idx].luma4x4, p_src, p_dst );
h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst );
return;
}
......@@ -100,7 +100,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
else
h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct4x4 );
h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
/* output samples to fdec */
......@@ -142,7 +142,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
{
int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
h->zigzagf.sub_4x4ac( h->dct.block[i].residual_ac, p_src+oe, p_dst+od );
h->zigzagf.sub_4x4( h->dct.luma4x4[i], p_src+oe, p_dst+od );
dct4x4[0][block_idx_x[i]][block_idx_y[i]] = p_src[oe] - p_dst[od];
p_dst[od] = p_src[oe];
}
......@@ -162,7 +162,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
else
h->quantf.quant_4x4( dct4x4[1+i], h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
h->zigzagf.scan_4x4ac( h->dct.block[i].residual_ac, dct4x4[1+i] );
h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[1+i] );
h->quantf.dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
}
......@@ -204,7 +204,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
{
int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
h->zigzagf.sub_4x4ac( h->dct.block[16+i+ch*4].residual_ac, p_src+oe, p_dst+od );
h->zigzagf.sub_4x4( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od );
h->dct.chroma_dc[ch][i] = p_src[oe] - p_dst[od];
p_dst[od] = p_src[oe];
}
......@@ -220,11 +220,11 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
/* no trellis; it doesn't seem to help chroma noticeably */
h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qscale], h->quant4_bias[CQM_4IC+b_inter][i_qscale] );
h->zigzagf.scan_4x4ac( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
if( b_decimate )
{
i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
i_decimate_score += x264_mb_decimate_score( h->dct.luma4x4[16+i+ch*4]+1, 15 );
}
}
......@@ -239,7 +239,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
if( b_decimate && i_decimate_score < 7 )
{
/* Near null chroma 8x8 block so make it null (bits saving) */
memset( &h->dct.block[16+ch*4], 0, 4 * sizeof( *h->dct.block ) );
memset( &h->dct.luma4x4[16+ch*4], 0, 4 * sizeof( *h->dct.luma4x4 ) );
if( !array_non_zero( dct2x2 ) )
continue;
memset( dct4x4, 0, sizeof( dct4x4 ) );
......@@ -259,7 +259,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
h->mb.i_cbp_chroma = 0;
for( i = 0; i < 8; i++ )
{
int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
int nz = array_non_zero_count( h->dct.luma4x4[16+i]+1, 15 );
h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
h->mb.i_cbp_chroma |= nz;
}
......@@ -395,7 +395,7 @@ void x264_macroblock_encode( x264_t *h )
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
/* In RD mode, restore the now-overwritten DCT data. */
if( h->mb.i_skip_intra == 2 )
h->mc.memcpy_aligned( h->dct.block, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
}
for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
{
......@@ -424,7 +424,7 @@ void x264_macroblock_encode( x264_t *h )
{
int x = 4*block_idx_x[i4x4];
int y = 4*block_idx_y[i4x4];
h->zigzagf.sub_4x4( h->dct.block[i4x4].luma4x4,
h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
}
......@@ -497,10 +497,10 @@ void x264_macroblock_encode( x264_t *h )
else
h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
h->zigzagf.scan_4x4( h->dct.block[idx].luma4x4, dct4x4[idx] );
h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
if( b_decimate )
i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
i_decimate_8x8 += x264_mb_decimate_score( h->dct.luma4x4[idx], 16 );
}
/* decimate this 8x8 block */
......@@ -508,13 +508,13 @@ void x264_macroblock_encode( x264_t *h )
if( i_decimate_8x8 < 4 && b_decimate )
{
memset( &dct4x4[i8x8*4], 0, 4 * sizeof( *dct4x4 ) );
memset( &h->dct.block[i8x8*4], 0, 4 * sizeof( *h->dct.block ) );
memset( &h->dct.luma4x4[i8x8*4], 0, 4 * sizeof( *h->dct.luma4x4 ) );
nnz8x8[i8x8] = 0;
}
}
if( i_decimate_mb < 6 && b_decimate )
memset( h->dct.block, 0, 16 * sizeof( *h->dct.block ) );
memset( h->dct.luma4x4, 0, 16 * sizeof( *h->dct.luma4x4 ) );
else
{
for( i8x8 = 0; i8x8 < 4; i8x8++ )
......@@ -545,7 +545,7 @@ void x264_macroblock_encode( x264_t *h )
{
for( i = 0; i < 16; i++ )
{
const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
const int nz = array_non_zero_count( h->dct.luma4x4[i]+1, 15 );
h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
if( nz > 0 )
h->mb.i_cbp_luma = 0x0f;
......@@ -569,7 +569,7 @@ void x264_macroblock_encode( x264_t *h )
{
for( i = 0; i < 16; i++ )
{
const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
const int nz = array_non_zero_count( h->dct.luma4x4[i], 16 );
h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
if( nz > 0 )
h->mb.i_cbp_luma |= 1 << (i/4);
......@@ -697,9 +697,9 @@ int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
{
h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
h->zigzagf.scan_4x4ac( dctscan, dct4x4[i4x4] );
h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
i_decimate_mb += x264_mb_decimate_score( dctscan+1, 15 );
if( i_decimate_mb >= 7 )
{
return 0;
......@@ -812,13 +812,13 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
h->quantf.quant_4x4( dct4x4[2], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
h->quantf.quant_4x4( dct4x4[3], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
for( i4 = 0; i4 < 4; i4++ )
h->zigzagf.scan_4x4( h->dct.block[i8*4+i4].luma4x4, dct4x4[i4] );
h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] );
if( b_decimate )
{
int i_decimate_8x8 = 0;
for( i4 = 0; i4 < 4 && i_decimate_8x8 < 4; i4++ )
i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[i8*4+i4].luma4x4, 16 );
i_decimate_8x8 += x264_mb_decimate_score( h->dct.luma4x4[i8*4+i4], 16 );
nnz8x8 = 4 <= i_decimate_8x8;
}
else
......@@ -842,7 +842,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
h->zigzagf.scan_4x4ac( h->dct.block[16+i8+ch*4].residual_ac, dct4x4 );
h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 );
if( array_non_zero( dct4x4 ) )
{
h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
......
......@@ -385,9 +385,7 @@ static int check_dct( int cpu_ref, int cpu_new )
ok = 1; used_asm = 0;
TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16 );
TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15 );
TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15 );
report( "zigzag_frame :" );
x264_zigzag_init( 0, &zigzag_c, 1 );
......@@ -397,9 +395,7 @@ static int check_dct( int cpu_ref, int cpu_new )
ok = 1; used_asm = 0;
TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16 );
TEST_ZIGZAG_SCAN( scan_4x4ac, level1, level2, dct1[0], 15 );
TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
TEST_ZIGZAG_SUB( sub_4x4ac, level1, level2, 15 );
report( "zigzag_field :" );
#undef TEST_ZIGZAG_SCAN
#undef TEST_ZIGZAG_SUB
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment