Commit 5b0cb86f authored by Henrik Gramner's avatar Henrik Gramner Committed by Fiona Glaser

4:2:2 encoding support

parent 3d82e875
......@@ -42,6 +42,11 @@ E: gpoirier CHEZ mplayerhq POINT hu
D: Altivec optimizations
S: Brittany, France
N: Henrik Gramner
E: hengar-6 AT student DOT ltu DOT se
D: 4:2:2 chroma subsampling, x86 asm
S: Sweden
N: Fiona Glaser
E: fiona AT x264 DOT com
D: x86 asm, 1pass VBV, adaptive quantization, inline asm
......
......@@ -60,10 +60,11 @@ typedef struct
uint8_t run[16];
} x264_run_level_t;
extern const vlc_t x264_coeff0_token[5];
extern const vlc_t x264_coeff_token[5][16][4];
extern const vlc_t x264_coeff0_token[6];
extern const vlc_t x264_coeff_token[6][16][4];
extern const vlc_t x264_total_zeros[15][16];
extern const vlc_t x264_total_zeros_dc[3][4];
extern const vlc_t x264_total_zeros_2x2_dc[3][4];
extern const vlc_t x264_total_zeros_2x4_dc[7][8];
extern const vlc_t x264_run_before[7][16];
typedef struct
......
......@@ -426,21 +426,57 @@ void x264_param_apply_fastfirstpass( x264_param_t *param )
}
}
static int profile_string_to_int( const char *str )
{
if( !strcasecmp( str, "baseline" ) )
return PROFILE_BASELINE;
if( !strcasecmp( str, "main" ) )
return PROFILE_MAIN;
if( !strcasecmp( str, "high" ) )
return PROFILE_HIGH;
if( !strcasecmp( str, "high10" ) )
return PROFILE_HIGH10;
if( !strcasecmp( str, "high422" ) )
return PROFILE_HIGH422;
if( !strcasecmp( str, "high444" ) )
return PROFILE_HIGH444_PREDICTIVE;
return -1;
}
int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
if( !profile )
return 0;
#if BIT_DEPTH > 8
if( !strcasecmp( profile, "baseline" ) || !strcasecmp( profile, "main" ) ||
!strcasecmp( profile, "high" ) )
int p = profile_string_to_int( profile );
if( p < 0 )
{
x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d.\n", profile, BIT_DEPTH );
x264_log( NULL, X264_LOG_ERROR, "invalid profile: %s\n", profile );
return -1;
}
if( p < PROFILE_HIGH444_PREDICTIVE && ((param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant <= 0) ||
(param->rc.i_rc_method == X264_RC_CRF && (int)(param->rc.f_rf_constant + QP_BD_OFFSET) <= 0)) )
{
x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support lossless\n", profile );
return -1;
}
if( p < PROFILE_HIGH444_PREDICTIVE && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I444 )
{
x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support 4:4:4\n", profile );
return -1;
}
if( p < PROFILE_HIGH422 && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I422 )
{
x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support 4:2:2\n", profile );
return -1;
}
if( p < PROFILE_HIGH10 && BIT_DEPTH > 8 )
{
x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d\n", profile, BIT_DEPTH );
return -1;
}
#endif
if( !strcasecmp( profile, "baseline" ) )
if( p == PROFILE_BASELINE )
{
param->analyse.b_transform_8x8 = 0;
param->b_cabac = 0;
......@@ -459,27 +495,12 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
return -1;
}
}
else if( !strcasecmp( profile, "main" ) )
else if( p == PROFILE_MAIN )
{
param->analyse.b_transform_8x8 = 0;
param->i_cqm_preset = X264_CQM_FLAT;
param->psz_cqm_file = NULL;
}
else if( !strcasecmp( profile, "high" ) || !strcasecmp( profile, "high10" ) )
{
/* Default */
}
else
{
x264_log( NULL, X264_LOG_ERROR, "invalid profile: %s\n", profile );
return -1;
}
if( (param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant <= 0) ||
(param->rc.i_rc_method == X264_RC_CRF && (int)(param->rc.f_rf_constant + QP_BD_OFFSET) <= 0) )
{
x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support lossless\n", profile );
return -1;
}
return 0;
}
......@@ -1075,6 +1096,9 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
[X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
[X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, },
[X264_CSP_I444] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV24] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_BGR] = { 1, { 256*3 }, { 256*1 }, },
......
......@@ -40,6 +40,9 @@
#define IS_DISPOSABLE(type) ( type == X264_TYPE_B )
#define FIX8(f) ((int)(f*(1<<8)+.5))
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
#define CHROMA_FORMAT h->sps->i_chroma_format_idc
#define CHROMA_SIZE(s) ((s)>>(h->mb.chroma_h_shift+h->mb.chroma_v_shift))
#define FRAME_SIZE(s) ((s)+2*CHROMA_SIZE(s))
#define CHECKED_MALLOC( var, size )\
do {\
......@@ -56,7 +59,7 @@ do {\
#define X264_BFRAME_MAX 16
#define X264_REF_MAX 16
#define X264_THREAD_MAX 128
#define X264_PCM_COST ((384<<CHROMA444)*BIT_DEPTH+16)
#define X264_PCM_COST (FRAME_SIZE(256*BIT_DEPTH)+16)
#define X264_LOOKAHEAD_MAX 250
#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
#define QP_MAX_SPEC (51+QP_BD_OFFSET)
......@@ -102,7 +105,7 @@ do {\
# define PARAM_INTERLACED 0
#endif
#define CHROMA444 (h->sps->i_chroma_format_idc == 3)
#define CHROMA444 (CHROMA_FORMAT == CHROMA_444)
/* Unions for type-punning.
* Mn: load or store n bits, aligned, native-endian
......@@ -565,7 +568,7 @@ struct x264_t
struct
{
ALIGNED_16( dctcoef luma16x16_dc[3][16] );
ALIGNED_16( dctcoef chroma_dc[2][4] );
ALIGNED_16( dctcoef chroma_dc[2][8] );
// FIXME share memory?
ALIGNED_16( dctcoef luma8x8[12][64] );
ALIGNED_16( dctcoef luma4x4[16*3][16] );
......@@ -578,6 +581,10 @@ struct x264_t
int i_mb_height;
int i_mb_count; /* number of mbs in a frame */
/* Chroma subsampling */
int chroma_h_shift;
int chroma_v_shift;
/* Strides */
int i_mb_stride;
int i_b8_stride;
......@@ -882,6 +889,8 @@ struct x264_t
ALIGNED_16( uint32_t nr_residual_sum_buf[2][4][64] );
uint32_t nr_count_buf[2][4];
uint8_t luma2chroma_pixel[7]; /* Subsampled pixel size */
/* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
pixel *intra_border_backup[5][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
......@@ -891,9 +900,11 @@ struct x264_t
/* CPU functions dependents */
x264_predict_t predict_16x16[4+3];
x264_predict_t predict_8x8c[4+3];
x264_predict8x8_t predict_8x8[9+3];
x264_predict_t predict_4x4[9+3];
x264_predict_t predict_chroma[4+3];
x264_predict_t predict_8x8c[4+3];
x264_predict_t predict_8x16c[4+3];
x264_predict_8x8_filter_t predict_8x8_filter;
x264_pixel_function_t pixf;
......
......@@ -5,6 +5,7 @@
*
* Authors: Loren Merritt <lorenm@u.washington.edu>
* Laurent Aimar <fenrir@via.ecp.fr>
* Henrik Gramner <hengar-6@student.ltu.se>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -100,6 +101,42 @@ static void idct4x4dc( dctcoef d[16] )
}
}
static void dct2x4dc( dctcoef dct[8], dctcoef dct4x4[8][16] )
{
int a0 = dct4x4[0][0] + dct4x4[1][0];
int a1 = dct4x4[2][0] + dct4x4[3][0];
int a2 = dct4x4[4][0] + dct4x4[5][0];
int a3 = dct4x4[6][0] + dct4x4[7][0];
int a4 = dct4x4[0][0] - dct4x4[1][0];
int a5 = dct4x4[2][0] - dct4x4[3][0];
int a6 = dct4x4[4][0] - dct4x4[5][0];
int a7 = dct4x4[6][0] - dct4x4[7][0];
int b0 = a0 + a1;
int b1 = a2 + a3;
int b2 = a4 + a5;
int b3 = a6 + a7;
int b4 = a0 - a1;
int b5 = a2 - a3;
int b6 = a4 - a5;
int b7 = a6 - a7;
dct[0] = b0 + b1;
dct[1] = b2 + b3;
dct[2] = b0 - b1;
dct[3] = b2 - b3;
dct[4] = b4 - b5;
dct[5] = b6 - b7;
dct[6] = b4 + b5;
dct[7] = b6 + b7;
dct4x4[0][0] = 0;
dct4x4[1][0] = 0;
dct4x4[2][0] = 0;
dct4x4[3][0] = 0;
dct4x4[4][0] = 0;
dct4x4[5][0] = 0;
dct4x4[6][0] = 0;
dct4x4[7][0] = 0;
}
static inline void pixel_sub_wxh( dctcoef *diff, int i_size,
pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
{
......@@ -164,14 +201,10 @@ static void sub16x16_dct( dctcoef dct[16][16], pixel *pix1, pixel *pix2 )
static int sub4x4_dct_dc( pixel *pix1, pixel *pix2 )
{
dctcoef d[16];
int sum = 0;
pixel_sub_wxh( d, 4, pix1, FENC_STRIDE, pix2, FDEC_STRIDE );
sum += d[0] + d[1] + d[2] + d[3] + d[4] + d[5] + d[6] + d[7];
sum += d[8] + d[9] + d[10] + d[11] + d[12] + d[13] + d[14] + d[15];
for( int i=0; i<4; i++, pix1 += FENC_STRIDE, pix2 += FDEC_STRIDE )
sum += pix1[0] + pix1[1] + pix1[2] + pix1[3]
- pix2[0] - pix2[1] - pix2[2] - pix2[3];
return sum;
}
......@@ -188,11 +221,49 @@ static void sub8x8_dct_dc( dctcoef dct[4], pixel *pix1, pixel *pix2 )
int d2 = dct[0] - dct[1];
int d3 = dct[2] - dct[3];
dct[0] = d0 + d1;
dct[2] = d2 + d3;
dct[1] = d0 - d1;
dct[2] = d2 + d3;
dct[3] = d2 - d3;
}
static void sub8x16_dct_dc( dctcoef dct[8], pixel *pix1, pixel *pix2 )
{
int a0 = sub4x4_dct_dc( &pix1[ 0*FENC_STRIDE+0], &pix2[ 0*FDEC_STRIDE+0] );
int a1 = sub4x4_dct_dc( &pix1[ 0*FENC_STRIDE+4], &pix2[ 0*FDEC_STRIDE+4] );
int a2 = sub4x4_dct_dc( &pix1[ 4*FENC_STRIDE+0], &pix2[ 4*FDEC_STRIDE+0] );
int a3 = sub4x4_dct_dc( &pix1[ 4*FENC_STRIDE+4], &pix2[ 4*FDEC_STRIDE+4] );
int a4 = sub4x4_dct_dc( &pix1[ 8*FENC_STRIDE+0], &pix2[ 8*FDEC_STRIDE+0] );
int a5 = sub4x4_dct_dc( &pix1[ 8*FENC_STRIDE+4], &pix2[ 8*FDEC_STRIDE+4] );
int a6 = sub4x4_dct_dc( &pix1[12*FENC_STRIDE+0], &pix2[12*FDEC_STRIDE+0] );
int a7 = sub4x4_dct_dc( &pix1[12*FENC_STRIDE+4], &pix2[12*FDEC_STRIDE+4] );
/* 2x4 DC transform */
int b0 = a0 + a1;
int b1 = a2 + a3;
int b2 = a4 + a5;
int b3 = a6 + a7;
int b4 = a0 - a1;
int b5 = a2 - a3;
int b6 = a4 - a5;
int b7 = a6 - a7;
a0 = b0 + b1;
a1 = b2 + b3;
a2 = b4 + b5;
a3 = b6 + b7;
a4 = b0 - b1;
a5 = b2 - b3;
a6 = b4 - b5;
a7 = b6 - b7;
dct[0] = a0 + a1;
dct[1] = a2 + a3;
dct[2] = a0 - a1;
dct[3] = a2 - a3;
dct[4] = a4 - a5;
dct[5] = a6 - a7;
dct[6] = a4 + a5;
dct[7] = a6 + a7;
}
static void add4x4_idct( pixel *p_dst, dctcoef dct[16] )
{
dctcoef d[16];
......@@ -408,6 +479,8 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->add8x8_idct = add8x8_idct;
dctf->add8x8_idct_dc = add8x8_idct_dc;
dctf->sub8x16_dct_dc = sub8x16_dct_dc;
dctf->sub16x16_dct = sub16x16_dct;
dctf->add16x16_idct = add16x16_idct;
dctf->add16x16_idct_dc = add16x16_idct_dc;
......@@ -421,6 +494,8 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->dct4x4dc = dct4x4dc;
dctf->idct4x4dc = idct4x4dc;
dctf->dct2x4dc = dct2x4dc;
#if HIGH_BIT_DEPTH
#if HAVE_MMX
if( cpu&X264_CPU_MMX )
......
......@@ -104,6 +104,8 @@ typedef struct
void (*add8x8_idct) ( pixel *p_dst, dctcoef dct[4][16] );
void (*add8x8_idct_dc) ( pixel *p_dst, dctcoef dct[4] );
void (*sub8x16_dct_dc)( dctcoef dct[8], pixel *pix1, pixel *pix2 );
void (*sub16x16_dct) ( dctcoef dct[16][16], pixel *pix1, pixel *pix2 );
void (*add16x16_idct)( pixel *p_dst, dctcoef dct[16][16] );
void (*add16x16_idct_dc) ( pixel *p_dst, dctcoef dct[16] );
......@@ -117,6 +119,8 @@ typedef struct
void (*dct4x4dc) ( dctcoef d[16] );
void (*idct4x4dc)( dctcoef d[16] );
void (*dct2x4dc)( dctcoef dct[8], dctcoef dct4x4[8][16] );
} x264_dct_function_t;
typedef struct
......
......@@ -6,6 +6,7 @@
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
* Loren Merritt <lorenm@u.washington.edu>
* Fiona Glaser <fiona@x264.com>
* Henrik Gramner <hengar-6@student.ltu.se>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -119,7 +120,7 @@ static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alp
deblock_edge_luma_c( pix, xstride, alpha, beta, tc0[i] );
}
}
static void deblock_v_luma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
static void deblock_h_luma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
for( int d = 0; d < 8; d++, pix += stride )
deblock_edge_luma_c( pix, 1, alpha, beta, tc0[d>>1] );
......@@ -147,33 +148,42 @@ static ALWAYS_INLINE void deblock_edge_chroma_c( pixel *pix, int xstride, int al
pix[ 0*xstride] = x264_clip_pixel( q0 - delta ); /* q0' */
}
}
static inline void deblock_chroma_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
static ALWAYS_INLINE void deblock_chroma_c( pixel *pix, int height, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
{
for( int i = 0; i < 4; i++ )
{
int tc = tc0[i];
if( tc <= 0 )
{
pix += 2*ystride;
pix += height*ystride;
continue;
}
for( int d = 0; d < 2; d++, pix += ystride-2 )
for( int e = 0; e < 2; e++, pix++ )
deblock_edge_chroma_c( pix, xstride, alpha, beta, tc0[i] );
for( int d = 0; d < height; d++, pix += ystride-2 )
for( int e = 0; e < 2; e++, pix++ )
deblock_edge_chroma_c( pix, xstride, alpha, beta, tc0[i] );
}
}
static void deblock_v_chroma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
static void deblock_h_chroma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
for( int i = 0; i < 4; i++, pix += stride )
deblock_edge_chroma_c( pix, 2, alpha, beta, tc0[i] );
}
static void deblock_h_chroma_422_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
for( int i = 0; i < 8; i++, pix += stride )
deblock_edge_chroma_c( pix, 2, alpha, beta, tc0[i>>1] );
}
static void deblock_v_chroma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
deblock_chroma_c( pix, stride, 2, alpha, beta, tc0 );
deblock_chroma_c( pix, 2, stride, 2, alpha, beta, tc0 );
}
static void deblock_h_chroma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
deblock_chroma_c( pix, 2, stride, alpha, beta, tc0 );
deblock_chroma_c( pix, 2, 2, stride, alpha, beta, tc0 );
}
static void deblock_h_chroma_422_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
deblock_chroma_c( pix, 4, 2, stride, alpha, beta, tc0 );
}
static ALWAYS_INLINE void deblock_edge_luma_intra_c( pixel *pix, int xstride, int alpha, int beta )
......@@ -220,7 +230,7 @@ static inline void deblock_luma_intra_c( pixel *pix, int xstride, int ystride, i
for( int d = 0; d < 16; d++, pix += ystride )
deblock_edge_luma_intra_c( pix, xstride, alpha, beta );
}
static void deblock_v_luma_intra_mbaff_c( pixel *pix, int ystride, int alpha, int beta )
static void deblock_h_luma_intra_mbaff_c( pixel *pix, int ystride, int alpha, int beta )
{
for( int d = 0; d < 8; d++, pix += ystride )
deblock_edge_luma_intra_c( pix, 1, alpha, beta );
......@@ -247,24 +257,33 @@ static ALWAYS_INLINE void deblock_edge_chroma_intra_c( pixel *pix, int xstride,
pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
}
}
static inline void deblock_chroma_intra_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int dir )
static ALWAYS_INLINE void deblock_chroma_intra_c( pixel *pix, int width, int height, int xstride, int ystride, int alpha, int beta )
{
for( int d = 0; d < (dir?16:8); d++, pix += ystride-2 )
for( int e = 0; e < (dir?1:2); e++, pix++ )
deblock_edge_chroma_intra_c( pix, xstride, alpha, beta );
for( int d = 0; d < height; d++, pix += ystride-2 )
for( int e = 0; e < width; e++, pix++ )
deblock_edge_chroma_intra_c( pix, xstride, alpha, beta );
}
static void deblock_v_chroma_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
static void deblock_h_chroma_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
{
for( int i = 0; i < 4; i++, pix += stride )
deblock_edge_chroma_intra_c( pix, 2, alpha, beta );
}
static void deblock_h_chroma_422_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
{
for( int i = 0; i < 8; i++, pix += stride )
deblock_edge_chroma_intra_c( pix, 2, alpha, beta );
}
static void deblock_v_chroma_intra_c( pixel *pix, int stride, int alpha, int beta )
{
deblock_chroma_intra_c( pix, stride, 2, alpha, beta, 1 );
deblock_chroma_intra_c( pix, 1, 16, stride, 2, alpha, beta );
}
static void deblock_h_chroma_intra_c( pixel *pix, int stride, int alpha, int beta )
{
deblock_chroma_intra_c( pix, 2, stride, alpha, beta, 0 );
deblock_chroma_intra_c( pix, 2, 8, 2, stride, alpha, beta );
}
static void deblock_h_chroma_422_intra_c( pixel *pix, int stride, int alpha, int beta )
{
deblock_chroma_intra_c( pix, 2, 16, 2, stride, alpha, beta );
}
static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
......@@ -375,6 +394,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
int stridey = h->fdec->i_stride[0];
int strideuv = h->fdec->i_stride[1];
int chroma444 = CHROMA444;
int chroma_height = 16 >> h->mb.chroma_v_shift;
intptr_t uvdiff = chroma444 ? h->fdec->plane[2] - h->fdec->plane[1] : 1;
for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
......@@ -388,12 +408,12 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][mb_x];
pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
pixel *pixuv = h->fdec->plane[1] + (8<<chroma444)*mb_y*strideuv + 16*mb_x;
pixel *pixuv = h->fdec->plane[1] + chroma_height*mb_y*strideuv + 16*mb_x;
if( mb_y & MB_INTERLACED )
{
pixy -= 15*stridey;
pixuv -= ((8<<chroma444)-1)*strideuv;
pixuv -= (chroma_height-1)*strideuv;
}
int stride2y = stridey << MB_INTERLACED;
......@@ -405,22 +425,33 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
#define FILTER( intra, dir, edge, qp, chroma_qp )\
do\
{\
deblock_edge##intra( h, pixy + 4*edge*(dir?stride2y:1),\
stride2y, bs[dir][edge], qp, a, b, 0,\
h->loopf.deblock_luma##intra[dir] );\
if( chroma444 )\
if( !(edge & 1) || !transform_8x8 )\
{\
deblock_edge##intra( h, pixuv + 4*edge*(dir?stride2uv:1),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
h->loopf.deblock_luma##intra[dir] );\
deblock_edge##intra( h, pixuv + uvdiff + 4*edge*(dir?stride2uv:1),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
deblock_edge##intra( h, pixy + 4*edge*(dir?stride2y:1),\
stride2y, bs[dir][edge], qp, a, b, 0,\
h->loopf.deblock_luma##intra[dir] );\
if( CHROMA_FORMAT == CHROMA_444 )\
{\
deblock_edge##intra( h, pixuv + 4*edge*(dir?stride2uv:1),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
h->loopf.deblock_luma##intra[dir] );\
deblock_edge##intra( h, pixuv + uvdiff + 4*edge*(dir?stride2uv:1),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
h->loopf.deblock_luma##intra[dir] );\
}\
else if( CHROMA_FORMAT == CHROMA_420 && !(edge & 1) )\
{\
deblock_edge##intra( h, pixuv + edge*(dir?2*stride2uv:4),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
h->loopf.deblock_chroma##intra[dir] );\
}\
}\
else if( !(edge & 1) )\
deblock_edge##intra( h, pixuv + 2*edge*(dir?stride2uv:2),\
if( CHROMA_FORMAT == CHROMA_422 && (dir || !(edge & 1)) )\
{\
deblock_edge##intra( h, pixuv + edge*(dir?4*stride2uv:4),\
stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
h->loopf.deblock_chroma##intra[dir] );\
}\
} while(0)
if( h->mb.i_neighbour & MB_LEFT )
......@@ -431,9 +462,9 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
int chroma_qp[2];
int left_qp[2];
x264_deblock_inter_t luma_deblock = h->loopf.deblock_luma_mbaff;
x264_deblock_inter_t chroma_deblock = chroma444 ? h->loopf.deblock_luma_mbaff : h->loopf.deblock_chroma_mbaff;
x264_deblock_inter_t chroma_deblock = h->loopf.deblock_chroma_mbaff;
x264_deblock_intra_t luma_intra_deblock = h->loopf.deblock_luma_intra_mbaff;
x264_deblock_intra_t chroma_intra_deblock = chroma444 ? h->loopf.deblock_luma_intra_mbaff : h->loopf.deblock_chroma_intra_mbaff;
x264_deblock_intra_t chroma_intra_deblock = h->loopf.deblock_chroma_intra_mbaff;
int c = chroma444 ? 0 : 1;
left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]];
......@@ -453,8 +484,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
}
int offy = MB_INTERLACED ? 4 : 0;
int offuv = MB_INTERLACED ? 3 : 0;
if( chroma444 ) offuv = offy;
int offuv = MB_INTERLACED ? 4-h->mb.chroma_v_shift : 0;
left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
luma_qp[1] = (qp + left_qp[1] + 1) >> 1;
chroma_qp[1] = (qpc + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
......@@ -486,9 +516,9 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
}
if( !first_edge_only )
{
if( !transform_8x8 ) FILTER( , 0, 1, qp, qpc );
FILTER( , 0, 2, qp, qpc );
if( !transform_8x8 ) FILTER( , 0, 3, qp, qpc );
FILTER( , 0, 1, qp, qpc );
FILTER( , 0, 2, qp, qpc );
FILTER( , 0, 3, qp, qpc );
}
if( h->mb.i_neighbour & MB_TOP )
......@@ -540,9 +570,9 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
if( !first_edge_only )
{
if( !transform_8x8 ) FILTER( , 1, 1, qp, qpc );
FILTER( , 1, 2, qp, qpc );
if( !transform_8x8 ) FILTER( , 1, 3, qp, qpc );
FILTER( , 1, 1, qp, qpc );
FILTER( , 1, 2, qp, qpc );
FILTER( , 1, 3, qp, qpc );
}
#undef FILTER
......@@ -553,7 +583,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
* TODO:
* deblock macroblock edges
* support analysis partitions smaller than 16x16
* deblock chroma for 4:2:0
* deblock chroma for 4:2:0/4:2:2
* handle duplicate refs correctly
* handle cavlc+8x8dct correctly
*/
......@@ -683,15 +713,19 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
pf->deblock_luma[1] = deblock_v_luma_c;
pf->deblock_luma[0] = deblock_h_luma_c;
pf->deblock_chroma[1] = deblock_v_chroma_c;
pf->deblock_chroma[0] = deblock_h_chroma_c;
pf->deblock_h_chroma_420 = deblock_h_chroma_c;
pf->deblock_h_chroma_422 = deblock_h_chroma_422_c;
pf->deblock_luma_intra[1] = deblock_v_luma_intra_c;
pf->deblock_luma_intra[0] = deblock_h_luma_intra_c;
pf->deblock_chroma_intra[1] = deblock_v_chroma_intra_c;
pf->deblock_chroma_intra[0] = deblock_h_chroma_intra_c;
pf->deblock_luma_mbaff = deblock_v_luma_mbaff_c;
pf->deblock_chroma_mbaff = deblock_v_chroma_mbaff_c;
pf->deblock_luma_intra_mbaff = deblock_v_luma_intra_mbaff_c;
pf->deblock_chroma_intra_mbaff = deblock_v_chroma_intra_mbaff_c;
pf->deblock_h_chroma_420_intra = deblock_h_chroma_intra_c;
pf->deblock_h_chroma_422_intra = deblock_h_chroma_422_intra_c;
pf->deblock_luma_mbaff = deblock_h_luma_mbaff_c;
pf->deblock_chroma_420_mbaff = deblock_h_chroma_mbaff_c;
pf->deblock_chroma_422_mbaff = deblock_h_chroma_422_mbaff_c;
pf->deblock_luma_intra_mbaff = deblock_h_luma_intra_mbaff_c;
pf->deblock_chroma_420_intra_mbaff = deblock_h_chroma_intra_mbaff_c;
pf->deblock_chroma_422_intra_mbaff = deblock_h_chroma_422_intra_mbaff_c;
pf->deblock_strength = deblock_strength_c;
#if HAVE_MMX
......@@ -701,11 +735,11 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
pf->deblock_luma[1] = x264_deblock_v_luma_mmx2;
pf->deblock_luma[0] = x264_deblock_h_luma_mmx2;
pf->deblock_chroma[1] = x264_deblock_v_chroma_mmx2;
pf->deblock_chroma[0] = x264_deblock_h_chroma_mmx2;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_mmx2;
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmx2;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmx2;
pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_mmx2;
pf->deblock_chroma_intra[0] = x264_deblock_h_chroma_intra_mmx2;
pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_mmx2;
#endif
pf->deblock_strength = x264_deblock_strength_mmx2;
if( cpu&X264_CPU_SSE2 )
......@@ -716,11 +750,11 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
pf->deblock_luma[0] = x264_deblock_h_luma_sse2;
pf->deblock_chroma[1] = x264_deblock_v_chroma_sse2;
pf->deblock_chroma[0] = x264_deblock_h_chroma_sse2;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_sse2;
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_sse2;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_sse2;
pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_sse2;
pf->deblock_chroma_intra[0] = x264_deblock_h_chroma_intra_sse2;
pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_sse2;
}
}
if( cpu&X264_CPU_SSSE3 )
......@@ -733,11 +767,11 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
pf->deblock_luma[1] = x264_deblock_v_luma_avx;
pf->deblock_luma[0] = x264_deblock_h_luma_avx;
pf->deblock_chroma[1] = x264_deblock_v_chroma_avx;
pf->deblock_chroma[0] = x264_deblock_h_chroma_avx;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_avx;
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_avx;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_avx;
pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_avx;
pf->deblock_chroma_intra[0] = x264_deblock_h_chroma_intra_avx;
pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_avx;
}
}
}
......@@ -758,7 +792,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
pf->deblock_luma[1] = x264_deblock_v_luma_neon;
pf->deblock_luma[0] = x264_deblock_h_luma_neon;
// pf->deblock_chroma[1] = x264_deblock_v_chroma_neon;
// pf->deblock_chroma[0] = x264_deblock_h_chroma_neon;
// pf->deblock_h_chroma_420 = x264_deblock_h_chroma_neon;
}
#endif
#endif // !HIGH_BIT_DEPTH
......
......@@ -50,6 +50,10 @@ static int x264_frame_internal_csp( int external_csp )
case X264_CSP_I420:
case X264_CSP_YV12:
return X264_CSP_NV12;
case X264_CSP_NV16:
case X264_CSP_I422:
case X264_CSP_YV16:
return X264_CSP_NV16;
case X264_CSP_I444:
case X264_CSP_YV24:
case X264_CSP_BGR:
......@@ -66,11 +70,10 @@ static x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
x264_frame_t *frame;
int i_csp = x264_frame_internal_csp( h->param.i_csp );
int i_mb_count = h->mb.i_mb_count;
int i_stride, i_width, i_lines;
int i_stride, i_width, i_lines, luma_plane_count;
int i_padv = PADV << PARAM_INTERLACED;
int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
int disalign = h->param.cpu&X264_CPU_ALTIVEC ? 1<<9 : 1<<10;
int luma_plane_count = i_csp == X264_CSP_NV12 ? 1 : 3;
CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
......@@ -79,18 +82,20 @@ static x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
i_lines = h->mb.i_mb_height*16;
i_stride = align_stride( i_width + 2*PADH, align, disalign );
if( i_csp == X264_CSP_NV12 )
if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
{