Commit e46bf243 authored by Oskar Arvidsson's avatar Oskar Arvidsson Committed by Fiona Glaser

Convert to a unified "dctcoeff" type for DCT data

Necessary for future high bit-depth support.
parent 17a04af4
......@@ -102,11 +102,15 @@ typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; u
typedef uint8_t pixel;
typedef uint32_t pixel4;
typedef int16_t dctcoef;
#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
#define MPIXEL_X4(src) M32(src)
#define CPPIXEL_X4(dst,src) CP32(dst,src)
#define CPPIXEL_X8(dst,src) CP64(dst,src)
#define MDCT_X2(dct) M32(dct)
#define CPDCT_X2(dst,src) CP32(dst,src)
#define CPDCT_X4(dst,src) CP64(dst,src)
#define X264_SCAN8_SIZE (6*8)
#define X264_SCAN8_LUMA_SIZE (5*8)
......@@ -502,11 +506,11 @@ struct x264_t
/* Current MB DCT coeffs */
struct
{
ALIGNED_16( int16_t luma16x16_dc[16] );
ALIGNED_16( int16_t chroma_dc[2][4] );
ALIGNED_16( dctcoef luma16x16_dc[16] );
ALIGNED_16( dctcoef chroma_dc[2][4] );
// FIXME share memory?
ALIGNED_16( int16_t luma8x8[4][64] );
ALIGNED_16( int16_t luma4x4[16+8][16] );
ALIGNED_16( dctcoef luma8x8[4][64] );
ALIGNED_16( dctcoef luma4x4[16+8][16] );
} dct;
/* MB table and cache for current frame/mb */
......@@ -625,16 +629,16 @@ struct x264_t
/* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
ALIGNED_16( pixel i8x8_fdec_buf[16*16] );
ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
ALIGNED_16( dctcoef i8x8_dct_buf[3][64] );
ALIGNED_16( dctcoef i4x4_dct_buf[15][16] );
uint32_t i4x4_nnz_buf[4];
uint32_t i8x8_nnz_buf[4];
int i4x4_cbp;
int i8x8_cbp;
/* Psy trellis DCT data */
ALIGNED_16( int16_t fenc_dct8[4][64] );
ALIGNED_16( int16_t fenc_dct4[16][16] );
ALIGNED_16( dctcoef fenc_dct8[4][64] );
ALIGNED_16( dctcoef fenc_dct4[16][16] );
/* Psy RD SATD/SA8D scores cache */
ALIGNED_16( uint64_t fenc_hadamard_cache[9] );
......
......@@ -35,9 +35,9 @@
int x264_dct4_weight2_zigzag[2][16];
int x264_dct8_weight2_zigzag[2][64];
static void dct4x4dc( int16_t d[16] )
static void dct4x4dc( dctcoef d[16] )
{
int16_t tmp[16];
dctcoef tmp[16];
for( int i = 0; i < 4; i++ )
{
......@@ -66,9 +66,9 @@ static void dct4x4dc( int16_t d[16] )
}
}
static void idct4x4dc( int16_t d[16] )
static void idct4x4dc( dctcoef d[16] )
{
int16_t tmp[16];
dctcoef tmp[16];
for( int i = 0; i < 4; i++ )
{
......@@ -97,7 +97,7 @@ static void idct4x4dc( int16_t d[16] )
}
}
static inline void pixel_sub_wxh( int16_t *diff, int i_size,
static inline void pixel_sub_wxh( dctcoef *diff, int i_size,
pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
{
for( int y = 0; y < i_size; y++ )
......@@ -109,10 +109,10 @@ static inline void pixel_sub_wxh( int16_t *diff, int i_size,
}
}
static void sub4x4_dct( int16_t dct[16], pixel *pix1, pixel *pix2 )
static void sub4x4_dct( dctcoef dct[16], pixel *pix1, pixel *pix2 )
{
int16_t d[16];
int16_t tmp[16];
dctcoef d[16];
dctcoef tmp[16];
pixel_sub_wxh( d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
......@@ -143,7 +143,7 @@ static void sub4x4_dct( int16_t dct[16], pixel *pix1, pixel *pix2 )
}
}
static void sub8x8_dct( int16_t dct[4][16], pixel *pix1, pixel *pix2 )
static void sub8x8_dct( dctcoef dct[4][16], pixel *pix1, pixel *pix2 )
{
sub4x4_dct( dct[0], &pix1[0], &pix2[0] );
sub4x4_dct( dct[1], &pix1[4], &pix2[4] );
......@@ -151,7 +151,7 @@ static void sub8x8_dct( int16_t dct[4][16], pixel *pix1, pixel *pix2 )
sub4x4_dct( dct[3], &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
}
static void sub16x16_dct( int16_t dct[16][16], pixel *pix1, pixel *pix2 )
static void sub16x16_dct( dctcoef dct[16][16], pixel *pix1, pixel *pix2 )
{
sub8x8_dct( &dct[ 0], &pix1[0], &pix2[0] );
sub8x8_dct( &dct[ 4], &pix1[8], &pix2[8] );
......@@ -161,7 +161,7 @@ static void sub16x16_dct( int16_t dct[16][16], pixel *pix1, pixel *pix2 )
static int sub4x4_dct_dc( pixel *pix1, pixel *pix2 )
{
int16_t d[16];
dctcoef d[16];
int sum = 0;
pixel_sub_wxh( d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
......@@ -172,7 +172,7 @@ static int sub4x4_dct_dc( pixel *pix1, pixel *pix2 )
return sum;
}
static void sub8x8_dct_dc( int16_t dct[4], pixel *pix1, pixel *pix2 )
static void sub8x8_dct_dc( dctcoef dct[4], pixel *pix1, pixel *pix2 )
{
dct[0] = sub4x4_dct_dc( &pix1[0], &pix2[0] );
dct[1] = sub4x4_dct_dc( &pix1[4], &pix2[4] );
......@@ -190,10 +190,10 @@ static void sub8x8_dct_dc( int16_t dct[4], pixel *pix1, pixel *pix2 )
dct[3] = d2 - d3;
}
static void add4x4_idct( pixel *p_dst, int16_t dct[16] )
static void add4x4_idct( pixel *p_dst, dctcoef dct[16] )
{
int16_t d[16];
int16_t tmp[16];
dctcoef d[16];
dctcoef tmp[16];
for( int i = 0; i < 4; i++ )
{
......@@ -230,7 +230,7 @@ static void add4x4_idct( pixel *p_dst, int16_t dct[16] )
}
}
static void add8x8_idct( pixel *p_dst, int16_t dct[4][16] )
static void add8x8_idct( pixel *p_dst, dctcoef dct[4][16] )
{
add4x4_idct( &p_dst[0], dct[0] );
add4x4_idct( &p_dst[4], dct[1] );
......@@ -238,7 +238,7 @@ static void add8x8_idct( pixel *p_dst, int16_t dct[4][16] )
add4x4_idct( &p_dst[4*FDEC_STRIDE+4], dct[3] );
}
static void add16x16_idct( pixel *p_dst, int16_t dct[16][16] )
static void add16x16_idct( pixel *p_dst, dctcoef dct[16][16] )
{
add8x8_idct( &p_dst[0], &dct[0] );
add8x8_idct( &p_dst[8], &dct[4] );
......@@ -277,9 +277,9 @@ static void add16x16_idct( pixel *p_dst, int16_t dct[16][16] )
DST(7) = (a4>>2) - a7 ;\
}
static void sub8x8_dct8( int16_t dct[64], pixel *pix1, pixel *pix2 )
static void sub8x8_dct8( dctcoef dct[64], pixel *pix1, pixel *pix2 )
{
int16_t tmp[64];
dctcoef tmp[64];
pixel_sub_wxh( tmp, 8, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
......@@ -298,7 +298,7 @@ static void sub8x8_dct8( int16_t dct[64], pixel *pix1, pixel *pix2 )
#undef DST
}
static void sub16x16_dct8( int16_t dct[4][64], pixel *pix1, pixel *pix2 )
static void sub16x16_dct8( dctcoef dct[4][64], pixel *pix1, pixel *pix2 )
{
sub8x8_dct8( dct[0], &pix1[0], &pix2[0] );
sub8x8_dct8( dct[1], &pix1[8], &pix2[8] );
......@@ -333,7 +333,7 @@ static void sub16x16_dct8( int16_t dct[4][64], pixel *pix1, pixel *pix2 )
DST(7, b0 - b7);\
}
static void add8x8_idct8( pixel *dst, int16_t dct[64] )
static void add8x8_idct8( pixel *dst, dctcoef dct[64] )
{
dct[0] += 32; // rounding for the >>6 at the end
......@@ -352,7 +352,7 @@ static void add8x8_idct8( pixel *dst, int16_t dct[64] )
#undef DST
}
static void add16x16_idct8( pixel *dst, int16_t dct[4][64] )
static void add16x16_idct8( pixel *dst, dctcoef dct[4][64] )
{
add8x8_idct8( &dst[0], dct[0] );
add8x8_idct8( &dst[8], dct[1] );
......@@ -360,7 +360,7 @@ static void add16x16_idct8( pixel *dst, int16_t dct[4][64] )
add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] );
}
static void inline add4x4_idct_dc( pixel *p_dst, int16_t dc )
static void inline add4x4_idct_dc( pixel *p_dst, dctcoef dc )
{
dc = (dc + 32) >> 6;
for( int i = 0; i < 4; i++, p_dst += FDEC_STRIDE )
......@@ -372,7 +372,7 @@ static void inline add4x4_idct_dc( pixel *p_dst, int16_t dc )
}
}
static void add8x8_idct_dc( pixel *p_dst, int16_t dct[4] )
static void add8x8_idct_dc( pixel *p_dst, dctcoef dct[4] )
{
add4x4_idct_dc( &p_dst[0], dct[0] );
add4x4_idct_dc( &p_dst[4], dct[1] );
......@@ -380,7 +380,7 @@ static void add8x8_idct_dc( pixel *p_dst, int16_t dct[4] )
add4x4_idct_dc( &p_dst[4*FDEC_STRIDE+4], dct[3] );
}
static void add16x16_idct_dc( pixel *p_dst, int16_t dct[16] )
static void add16x16_idct_dc( pixel *p_dst, dctcoef dct[16] )
{
for( int i = 0; i < 4; i++, dct += 4, p_dst += 4*FDEC_STRIDE )
{
......@@ -578,12 +578,12 @@ void x264_dct_init_weights( void )
ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)\
ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
static void zigzag_scan_8x8_frame( int16_t level[64], int16_t dct[64] )
static void zigzag_scan_8x8_frame( dctcoef level[64], dctcoef dct[64] )
{
ZIGZAG8_FRAME
}
static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[64] )
static void zigzag_scan_8x8_field( dctcoef level[64], dctcoef dct[64] )
{
ZIGZAG8_FIELD
}
......@@ -592,18 +592,18 @@ static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[64] )
#define ZIG(i,y,x) level[i] = dct[x*4+y];
#define ZIGDC(i,y,x) ZIG(i,y,x)
static void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[16] )
static void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[16] )
{
ZIGZAG4_FRAME
}
static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[16] )
static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
{
CP32( level, dct );
CPDCT_X2( level, dct );
ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
CP32( level+6, dct+6 );
CP64( level+8, dct+8 );
CP64( level+12, dct+12 );
CPDCT_X2( level+6, dct+6 );
CPDCT_X4( level+8, dct+8 );
CPDCT_X4( level+12, dct+12 );
}
#undef ZIG
......@@ -628,7 +628,7 @@ static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[16] )
CPPIXEL_X8( p_dst+6*FDEC_STRIDE, p_src+6*FENC_STRIDE );\
CPPIXEL_X8( p_dst+7*FDEC_STRIDE, p_src+7*FENC_STRIDE );
static int zigzag_sub_4x4_frame( int16_t level[16], const pixel *p_src, pixel *p_dst )
static int zigzag_sub_4x4_frame( dctcoef level[16], const pixel *p_src, pixel *p_dst )
{
int nz = 0;
ZIGZAG4_FRAME
......@@ -636,7 +636,7 @@ static int zigzag_sub_4x4_frame( int16_t level[16], const pixel *p_src, pixel *p
return !!nz;
}
static int zigzag_sub_4x4_field( int16_t level[16], const pixel *p_src, pixel *p_dst )
static int zigzag_sub_4x4_field( dctcoef level[16], const pixel *p_src, pixel *p_dst )
{
int nz = 0;
ZIGZAG4_FIELD
......@@ -652,7 +652,7 @@ static int zigzag_sub_4x4_field( int16_t level[16], const pixel *p_src, pixel *p
level[0] = 0;\
}
static int zigzag_sub_4x4ac_frame( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc )
static int zigzag_sub_4x4ac_frame( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc )
{
int nz = 0;
ZIGZAG4_FRAME
......@@ -660,7 +660,7 @@ static int zigzag_sub_4x4ac_frame( int16_t level[16], const pixel *p_src, pixel
return !!nz;
}
static int zigzag_sub_4x4ac_field( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc )
static int zigzag_sub_4x4ac_field( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc )
{
int nz = 0;
ZIGZAG4_FIELD
......@@ -668,14 +668,14 @@ static int zigzag_sub_4x4ac_field( int16_t level[16], const pixel *p_src, pixel
return !!nz;
}
static int zigzag_sub_8x8_frame( int16_t level[64], const pixel *p_src, pixel *p_dst )
static int zigzag_sub_8x8_frame( dctcoef level[64], const pixel *p_src, pixel *p_dst )
{
int nz = 0;
ZIGZAG8_FRAME
COPY8x8
return !!nz;
}
static int zigzag_sub_8x8_field( int16_t level[64], const pixel *p_src, pixel *p_dst )
static int zigzag_sub_8x8_field( dctcoef level[64], const pixel *p_src, pixel *p_dst )
{
int nz = 0;
ZIGZAG8_FIELD
......@@ -686,7 +686,7 @@ static int zigzag_sub_8x8_field( int16_t level[64], const pixel *p_src, pixel *p
#undef ZIG
#undef COPY4x4
static void zigzag_interleave_8x8_cavlc( int16_t *dst, int16_t *src, uint8_t *nnz )
static void zigzag_interleave_8x8_cavlc( dctcoef *dst, dctcoef *src, uint8_t *nnz )
{
for( int i = 0; i < 4; i++ )
{
......
......@@ -91,37 +91,37 @@ typedef struct
// pix1 stride = FENC_STRIDE
// pix2 stride = FDEC_STRIDE
// p_dst stride = FDEC_STRIDE
void (*sub4x4_dct) ( int16_t dct[16], pixel *pix1, pixel *pix2 );
void (*add4x4_idct) ( pixel *p_dst, int16_t dct[16] );
void (*sub4x4_dct) ( dctcoef dct[16], pixel *pix1, pixel *pix2 );
void (*add4x4_idct) ( pixel *p_dst, dctcoef dct[16] );
void (*sub8x8_dct) ( int16_t dct[4][16], pixel *pix1, pixel *pix2 );
void (*sub8x8_dct_dc)( int16_t dct[4], pixel *pix1, pixel *pix2 );
void (*add8x8_idct) ( pixel *p_dst, int16_t dct[4][16] );
void (*add8x8_idct_dc) ( pixel *p_dst, int16_t dct[4] );
void (*sub8x8_dct) ( dctcoef dct[4][16], pixel *pix1, pixel *pix2 );
void (*sub8x8_dct_dc)( dctcoef dct[4], pixel *pix1, pixel *pix2 );
void (*add8x8_idct) ( pixel *p_dst, dctcoef dct[4][16] );
void (*add8x8_idct_dc) ( pixel *p_dst, dctcoef dct[4] );
void (*sub16x16_dct) ( int16_t dct[16][16], pixel *pix1, pixel *pix2 );
void (*add16x16_idct)( pixel *p_dst, int16_t dct[16][16] );
void (*add16x16_idct_dc) ( pixel *p_dst, int16_t dct[16] );
void (*sub16x16_dct) ( dctcoef dct[16][16], pixel *pix1, pixel *pix2 );
void (*add16x16_idct)( pixel *p_dst, dctcoef dct[16][16] );
void (*add16x16_idct_dc) ( pixel *p_dst, dctcoef dct[16] );
void (*sub8x8_dct8) ( int16_t dct[64], pixel *pix1, pixel *pix2 );
void (*add8x8_idct8) ( pixel *p_dst, int16_t dct[64] );
void (*sub8x8_dct8) ( dctcoef dct[64], pixel *pix1, pixel *pix2 );
void (*add8x8_idct8) ( pixel *p_dst, dctcoef dct[64] );
void (*sub16x16_dct8) ( int16_t dct[4][64], pixel *pix1, pixel *pix2 );
void (*add16x16_idct8)( pixel *p_dst, int16_t dct[4][64] );
void (*sub16x16_dct8) ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 );
void (*add16x16_idct8)( pixel *p_dst, dctcoef dct[4][64] );
void (*dct4x4dc) ( int16_t d[16] );
void (*idct4x4dc)( int16_t d[16] );
void (*dct4x4dc) ( dctcoef d[16] );
void (*idct4x4dc)( dctcoef d[16] );
} x264_dct_function_t;
typedef struct
{
void (*scan_8x8)( int16_t level[64], int16_t dct[64] );
void (*scan_4x4)( int16_t level[16], int16_t dct[16] );
int (*sub_8x8) ( int16_t level[64], const pixel *p_src, pixel *p_dst );
int (*sub_4x4) ( int16_t level[16], const pixel *p_src, pixel *p_dst );
int (*sub_4x4ac)( int16_t level[16], const pixel *p_src, pixel *p_dst, int16_t *dc );
void (*interleave_8x8_cavlc)( int16_t *dst, int16_t *src, uint8_t *nnz );
void (*scan_8x8)( dctcoef level[64], dctcoef dct[64] );
void (*scan_4x4)( dctcoef level[16], dctcoef dct[16] );
int (*sub_8x8) ( dctcoef level[64], const pixel *p_src, pixel *p_dst );
int (*sub_4x4) ( dctcoef level[16], const pixel *p_src, pixel *p_dst );
int (*sub_4x4ac)( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc );
void (*interleave_8x8_cavlc)( dctcoef *dst, dctcoef *src, uint8_t *nnz );
} x264_zigzag_function_t;
......
......@@ -346,7 +346,7 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
#define array_non_zero(a) array_non_zero_int(a, sizeof(a))
#define array_non_zero_int array_non_zero_int
static ALWAYS_INLINE int array_non_zero_int( int16_t *v, int i_count )
static ALWAYS_INLINE int array_non_zero_int( dctcoef *v, int i_count )
{
if(i_count == 8)
return !!M64( &v[0] );
......
......@@ -42,7 +42,7 @@
nz |= (coef); \
}
static int quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
static int quant_8x8( dctcoef dct[64], uint16_t mf[64], uint16_t bias[64] )
{
int nz = 0;
for( int i = 0; i < 64; i++ )
......@@ -50,7 +50,7 @@ static int quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
return !!nz;
}
static int quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
static int quant_4x4( dctcoef dct[16], uint16_t mf[16], uint16_t bias[16] )
{
int nz = 0;
for( int i = 0; i < 16; i++ )
......@@ -58,7 +58,7 @@ static int quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
return !!nz;
}
static int quant_4x4_dc( int16_t dct[16], int mf, int bias )
static int quant_4x4_dc( dctcoef dct[16], int mf, int bias )
{
int nz = 0;
for( int i = 0; i < 16; i++ )
......@@ -66,7 +66,7 @@ static int quant_4x4_dc( int16_t dct[16], int mf, int bias )
return !!nz;
}
static int quant_2x2_dc( int16_t dct[4], int mf, int bias )
static int quant_2x2_dc( dctcoef dct[4], int mf, int bias )
{
int nz = 0;
QUANT_ONE( dct[0], mf, bias );
......@@ -82,7 +82,7 @@ static int quant_2x2_dc( int16_t dct[4], int mf, int bias )
#define DEQUANT_SHR( x ) \
dct[x] = ( dct[x] * dequant_mf[i_mf][x] + f ) >> (-i_qbits)
static void dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp )
static void dequant_4x4( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
{
const int i_mf = i_qp%6;
const int i_qbits = i_qp/6 - 4;
......@@ -100,7 +100,7 @@ static void dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp )
}
}
static void dequant_8x8( int16_t dct[64], int dequant_mf[6][64], int i_qp )
static void dequant_8x8( dctcoef dct[64], int dequant_mf[6][64], int i_qp )
{
const int i_mf = i_qp%6;
const int i_qbits = i_qp/6 - 6;
......@@ -118,7 +118,7 @@ static void dequant_8x8( int16_t dct[64], int dequant_mf[6][64], int i_qp )
}
}
static void dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp )
static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
{
const int i_qbits = i_qp/6 - 6;
......@@ -137,7 +137,7 @@ static void dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp )
}
}
static void x264_denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, uint16_t *offset, int size )
{
for( int i = 1; i < size; i++ )
{
......@@ -171,14 +171,14 @@ const uint8_t x264_decimate_table8[64] =
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
static int ALWAYS_INLINE x264_decimate_score_internal( int16_t *dct, int i_max )
static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max )
{
const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4;
int i_score = 0;
int idx = i_max - 1;
/* Yes, dct[idx-1] is guaranteed to be 32-bit aligned. idx>=0 instead of 1 works correctly for the same reason */
while( idx >= 0 && M32( &dct[idx-1] ) == 0 )
while( idx >= 0 && MDCT_X2( &dct[idx-1] ) == 0 )
idx -= 2;
if( idx >= 0 && dct[idx] == 0 )
idx--;
......@@ -201,20 +201,20 @@ static int ALWAYS_INLINE x264_decimate_score_internal( int16_t *dct, int i_max )
return i_score;
}
static int x264_decimate_score15( int16_t *dct )
static int x264_decimate_score15( dctcoef *dct )
{
return x264_decimate_score_internal( dct+1, 15 );
}
static int x264_decimate_score16( int16_t *dct )
static int x264_decimate_score16( dctcoef *dct )
{
return x264_decimate_score_internal( dct, 16 );
}
static int x264_decimate_score64( int16_t *dct )
static int x264_decimate_score64( dctcoef *dct )
{
return x264_decimate_score_internal( dct, 64 );
}
static int ALWAYS_INLINE x264_coeff_last_internal( int16_t *l, int i_count )
static int ALWAYS_INLINE x264_coeff_last_internal( dctcoef *l, int i_count )
{
int i_last;
for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
......@@ -225,25 +225,25 @@ static int ALWAYS_INLINE x264_coeff_last_internal( int16_t *l, int i_count )
return i_last;
}
static int x264_coeff_last4( int16_t *l )
static int x264_coeff_last4( dctcoef *l )
{
return x264_coeff_last_internal( l, 4 );
}
static int x264_coeff_last15( int16_t *l )
static int x264_coeff_last15( dctcoef *l )
{
return x264_coeff_last_internal( l, 15 );
}
static int x264_coeff_last16( int16_t *l )
static int x264_coeff_last16( dctcoef *l )
{
return x264_coeff_last_internal( l, 16 );
}
static int x264_coeff_last64( int16_t *l )
static int x264_coeff_last64( dctcoef *l )
{
return x264_coeff_last_internal( l, 64 );
}
#define level_run(num)\
static int x264_coeff_level_run##num( int16_t *dct, x264_run_level_t *runlevel )\
static int x264_coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\
{\
int i_last = runlevel->last = x264_coeff_last##num(dct);\
int i_total = 0;\
......
......@@ -25,22 +25,22 @@
typedef struct
{
int (*quant_8x8)( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] );
int (*quant_4x4)( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] );
int (*quant_4x4_dc)( int16_t dct[16], int mf, int bias );
int (*quant_2x2_dc)( int16_t dct[4], int mf, int bias );
int (*quant_8x8)( dctcoef dct[64], uint16_t mf[64], uint16_t bias[64] );
int (*quant_4x4)( dctcoef dct[16], uint16_t mf[16], uint16_t bias[16] );
int (*quant_4x4_dc)( dctcoef dct[16], int mf, int bias );
int (*quant_2x2_dc)( dctcoef dct[4], int mf, int bias );
void (*dequant_8x8)( int16_t dct[64], int dequant_mf[6][64], int i_qp );
void (*dequant_4x4)( int16_t dct[16], int dequant_mf[6][16], int i_qp );
void (*dequant_4x4_dc)( int16_t dct[16], int dequant_mf[6][16], int i_qp );
void (*dequant_8x8)( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
void (*dequant_4x4)( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
void (*dequant_4x4_dc)( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
void (*denoise_dct)( int16_t *dct, uint32_t *sum, uint16_t *offset, int size );
void (*denoise_dct)( dctcoef *dct, uint32_t *sum, uint16_t *offset, int size );
int (*decimate_score15)( int16_t *dct );
int (*decimate_score16)( int16_t *dct );
int (*decimate_score64)( int16_t *dct );
int (*coeff_last[6])( int16_t *dct );
int (*coeff_level_run[5])( int16_t *dct, x264_run_level_t *runlevel );
int (*decimate_score15)( dctcoef *dct );
int (*decimate_score16)( dctcoef *dct );
int (*decimate_score64)( dctcoef *dct );
int (*coeff_last[6])( dctcoef *dct );
int (*coeff_level_run[5])( dctcoef *dct, x264_run_level_t *runlevel );
} x264_quant_function_t;
void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf );
......
......@@ -551,7 +551,7 @@ static const uint8_t coeff_abs_level_transition[2][8] = {
static const uint8_t count_cat_m1[5] = {15, 14, 15, 3, 14};
#if !RDO_SKIP_BS
static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l )
static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, dctcoef *l )
{
const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
......@@ -645,7 +645,7 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
* this is slightly incorrect because the sigmap is not reversible
* (contexts are repeated). However, there is nearly no quality penalty
* for this (~0.001db) and the speed boost (~30%) is worth it. */
static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l, int b_8x8 )
static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, dctcoef *l, int b_8x8 )
{
const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
......@@ -726,11 +726,11 @@ static void ALWAYS_INLINE block_residual_write_cabac_internal( x264_t *h, x264_c
}
}
static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, int16_t *l )
static void block_residual_write_cabac_8x8( x264_t *h, x264_cabac_t *cb, dctcoef *l )
{
block_residual_write_cabac_internal( h, cb, DCT_LUMA_8x8, l, 1 );
}
static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, int16_t *l )
static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBlockCat, dctcoef *l )
{
block_residual_write_cabac_internal( h, cb, i_ctxBlockCat, l, 0 );
}
......
......@@ -113,7 +113,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
return i_suffix_length;
}
static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, int16_t *l, int nC )
static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, dctcoef *l, int nC )
{
bs_t *s = &h->out.bs;
static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
......
......@@ -28,7 +28,7 @@
/* These chroma DC functions don't have assembly versions and are only used here. */
#define ZIG(i,y,x) level[i] = dct[x*2+y];
static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] )
static inline void zigzag_scan_2x2_dc( dctcoef level[4], dctcoef dct[4] )
{
ZIG(0,0,0)
ZIG(1,0,1)
......@@ -44,7 +44,7 @@ static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] )
int d3 = dct[2] - dct[3]; \
int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], int dequant_mf[6][16], int i_qp )
static inline void idct_dequant_2x2_dc( dctcoef dct[4], dctcoef dct4x4[4][16], int dequant_mf[6][16], int i_qp )
{
IDCT_DEQUANT_START
dct4x4[0][0] = (d0 + d1) * dmf >> 5;
......@@ -53,7 +53,7 @@ static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], i
dct4x4[3][0] = (d2 - d3) * dmf >> 5;
}
static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int dequant_mf[6][16], int i_qp )
static inline void idct_dequant_2x2_dconly( dctcoef out[4], dctcoef dct[4], int dequant_mf[6][16], int i_qp )
{
IDCT_DEQUANT_START
out[0] = (d0 + d1) * dmf >> 5;
......@@ -62,7 +62,7 @@ static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int
out[3] = (d2 - d3) * dmf >> 5;
}
static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] )
static inline void dct2x2dc( dctcoef d[4], dctcoef dct4x4[4][16] )
{
int d0 = dct4x4[0][0] + dct4x4[1][0];
int d1 = dct4x4[2][0] + dct4x4[3][0];
......@@ -78,7 +78,7 @@ static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] )
dct4x4[3][0] = 0;
}
static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, int16_t dct[16], int i_qp, int i_ctxBlockCat, int b_intra, int idx )
static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int i_ctxBlockCat, int b_intra, int idx )
{
int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
if( h->mb.b_trellis )
......@@ -87,7 +87,7 @@ static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, int16_t dct[16], int i_qp, i
return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
}
static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, int16_t dct[64], int i_qp, int b_intra, int idx )
static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx )
{
int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
if( h->mb.b_trellis )
......@@ -112,7 +112,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
int nz;
pixel *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
pixel *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
ALIGNED_ARRAY_16( int16_t, dct4x4,[16] );
ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
if( h->mb.b_lossless )
{
......@@ -158,7 +158,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
int nz;
pixel *p_src = &h->mb.pic.p_fenc[0][8*x + 8*y*FENC_STRIDE];
pixel *p_dst = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
ALIGNED_ARRAY_16( int16_t, dct8x8,[64] );
ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] );
if( h->mb.b_lossless )
{
......@@ -188,8 +188,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
pixel *p_src = h->mb.pic.p_fenc[0];
pixel *p_dst = h->mb.pic.p_fdec[0];
ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[16] );
ALIGNED_ARRAY_16( int16_t, dct_dc4x4,[16] );
ALIGNED_ARRAY_16( dctcoef, dct4x4,[16],[16] );
ALIGNED_ARRAY_16( dctcoef, dct_dc4x4,[16] );
int nz;
int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
......@@ -264,9 +264,9 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 );
}
static inline int idct_dequant_round_2x2_dc( int16_t ref[4], int16_t dct[4], int dequant_mf[6][16], int i_qp )
static inline int idct_dequant_round_2x2_dc( dctcoef ref[4], dctcoef dct[4], int dequant_mf[6][16], int i_qp )
{
int16_t out[4];
dctcoef out[4];
idct_dequant_2x2_dconly( out, dct, dequant_mf, i_qp );
return ((ref[0] ^ (out[0]+32))
| (ref[1] ^ (out[1]+32))
......@@ -278,9 +278,9 @@ static inline int idct_dequant_round_2x2_dc( int16_t ref[4], int16_t dct[4], int
* Unlike luma blocks, this can't be done with a lookup table or
* other shortcut technique because of the interdependencies
* between the coefficients due to the chroma DC transform. */
static inline int x264_mb_optimize_chroma_dc( x264_t *h, int b_inter, int i_qp, int16_t dct2x2[4] )
static inline int x264_mb_optimize_chroma_dc( x264_t *h, int b_inter, int i_qp, dctcoef dct2x2[4] )