diff --git a/common/set.c b/common/set.c index 6f4f471a2c51505384287fc0e4061ecb47c5d5b2..859a249d94d06486a2634ce4bf4739545788dfa2 100644 --- a/common/set.c +++ b/common/set.c @@ -158,6 +158,9 @@ int x264_cqm_init( x264_t *h ) quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->sps->scaling_list[4+i_list][i]); } } + +#define MAX_MF X264_MIN( 0xffff, (1 << (25 - BIT_DEPTH)) - 1 ) + for( int q = 0; q <= QP_MAX_SPEC; q++ ) { int j; @@ -174,9 +177,9 @@ int x264_cqm_init( x264_t *h ) // round to nearest, unless that would cause the deadzone to be negative h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j ); h->quant4_bias0[i_list][q][i] = (1<<15)/j; - if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) ) + if( j > MAX_MF && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) ) max_qp_err = q; - if( j > 0xffff && q > max_chroma_qp_err && (i_list == CQM_4IC || i_list == CQM_4PC) ) + if( j > MAX_MF && q > max_chroma_qp_err && (i_list == CQM_4IC || i_list == CQM_4PC) ) max_chroma_qp_err = q; } if( h->param.analyse.b_transform_8x8 ) @@ -194,9 +197,9 @@ int x264_cqm_init( x264_t *h ) } h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j ); h->quant8_bias0[i_list][q][i] = (1<<15)/j; - if( j > 0xffff && q > max_qp_err && (i_list == CQM_8IY || i_list == CQM_8PY) ) + if( j > MAX_MF && q > max_qp_err && (i_list == CQM_8IY || i_list == CQM_8PY) ) max_qp_err = q; - if( j > 0xffff && q > max_chroma_qp_err && (i_list == CQM_8IC || i_list == CQM_8PC) ) + if( j > MAX_MF && q > max_chroma_qp_err && (i_list == CQM_8IC || i_list == CQM_8PC) ) max_chroma_qp_err = q; } } diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm index 7a0dcd0a4c58fca2a02f2d7712de3631ccde1fca..af9c7236f6d1083de639ac0572de86d25d7b67bb 100644 --- a/common/x86/quant-a.asm +++ b/common/x86/quant-a.asm @@ -167,7 +167,7 @@ cextern popcnt_table ABSD m1, m0 paddd m1, %3 pmulld m1, %2 - psrad m1, 16 + psrld m1, 16 %else ; !sse4 mova m0, [%1] ABSD m1, m0 @@ -195,8 +195,8 @@ cextern popcnt_table paddd m3, %3 pmulld m2, %2 pmulld m3, %2 - psrad m2, 16 - psrad m3, 16 + psrld m2, 16 + psrld m3, 16 PSIGND m2, m0 PSIGND m3, m1 mova [%1 ], m2 @@ -222,7 +222,7 @@ cextern popcnt_table pmuludq m3, m4 psllq m3, 32 paddd m1, m3 - psrad m1, 16 + psrld m1, 16 PSIGND m1, m0 mova [%1], m1 ACCUM por, %5, 1, %4 @@ -238,8 +238,8 @@ cextern popcnt_table paddd m3, [%3+mmsize] pmulld m2, [%2 ] pmulld m3, [%2+mmsize] - psrad m2, 16 - psrad m3, 16 + psrld m2, 16 + psrld m3, 16 PSIGND m2, m0 PSIGND m3, m1 mova [%1 ], m2 diff --git a/tools/checkasm.c b/tools/checkasm.c index 392d8696639c775bb680d0984285738b9e432e68..3a9c812f1775af75895c66670c46abf043c7b4e5 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -881,7 +881,7 @@ static int check_dct( uint32_t cpu_ref, uint32_t cpu_new ) h->param.analyse.i_luma_deadzone[0] = 0; h->param.analyse.i_luma_deadzone[1] = 0; h->param.analyse.b_transform_8x8 = 1; - for( int i = 0; i < 6; i++ ) + for( int i = 0; i < 8; i++ ) h->sps->scaling_list[i] = x264_cqm_flat16; x264_cqm_init( h ); x264_quant_init( h, 0, &qf ); @@ -2054,30 +2054,65 @@ static int check_quant( uint32_t cpu_ref, uint32_t cpu_new ) h->chroma_qp_table = i_chroma_qp_table + 12; h->param.analyse.b_transform_8x8 = 1; - for( int i_cqm = 0; i_cqm < 4; i_cqm++ ) + static const uint8_t cqm_test4[16] = + { + 6,4,6,4, + 4,3,4,3, + 6,4,6,4, + 4,3,4,3 + }; + static const uint8_t cqm_test8[64] = + { + 3,3,4,3,3,3,4,3, + 3,3,4,3,3,3,4,3, + 4,4,5,4,4,4,5,4, + 3,3,4,3,3,3,4,3, + 3,3,4,3,3,3,4,3, + 3,3,4,3,3,3,4,3, + 4,4,5,4,4,4,5,4, + 3,3,4,3,3,3,4,3 + }; + + for( int i_cqm = 0; i_cqm < 6; i_cqm++ ) { if( i_cqm == 0 ) { - for( int i = 0; i < 6; i++ ) + for( int i = 0; i < 8; i++ ) h->sps->scaling_list[i] = x264_cqm_flat16; h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_FLAT; } else if( i_cqm == 1 ) { - for( int i = 0; i < 6; i++ ) + for( int i = 0; i < 8; i++ ) h->sps->scaling_list[i] = x264_cqm_jvt[i]; h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_JVT; } + else if( i_cqm == 2 ) + { + for( int i = 0; i < 4; i++ ) + h->sps->scaling_list[i] = cqm_test4; + for( int i = 4; i < 8; i++ ) + h->sps->scaling_list[i] = x264_cqm_flat16; + h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_CUSTOM; + } + else if( i_cqm == 3 ) + { + for( int i = 0; i < 4; i++ ) + h->sps->scaling_list[i] = x264_cqm_flat16; + for( int i = 4; i < 8; i++ ) + h->sps->scaling_list[i] = cqm_test8; + h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_CUSTOM; + } else { int max_scale = BIT_DEPTH < 10 ? 255 : 228; - if( i_cqm == 2 ) + if( i_cqm == 4 ) for( int i = 0; i < 64; i++ ) cqm_buf[i] = 10 + rand() % (max_scale - 9); else for( int i = 0; i < 64; i++ ) cqm_buf[i] = 1; - for( int i = 0; i < 6; i++ ) + for( int i = 0; i < 8; i++ ) h->sps->scaling_list[i] = cqm_buf; h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_CUSTOM; } @@ -2094,8 +2129,8 @@ static int check_quant( uint32_t cpu_ref, uint32_t cpu_new ) static const int scale1d[8] = {32,31,24,31,32,31,24,31}; \ for( int i = 0; i < max; i++ ) \ { \ - unsigned int scale = (255*scale1d[(i>>3)&7]*scale1d[i&7])/16; \ - dct1[i] = dct2[i] = (j>>(i>>6))&1 ? (rand()%(2*scale+1))-scale : 0; \ + int scale = (PIXEL_MAX*scale1d[(i>>3)&7]*scale1d[i&7])/16; \ + dct1[i] = dct2[i] = (j>>(i>>6))&1 ? (rand30()%(2*scale+1))-scale : 0; \ } \ } @@ -2104,8 +2139,8 @@ static int check_quant( uint32_t cpu_ref, uint32_t cpu_new ) static const int scale1d[4] = {4,6,4,6}; \ for( int i = 0; i < max; i++ ) \ { \ - unsigned int scale = 255*scale1d[(i>>2)&3]*scale1d[i&3]; \ - dct1[i] = dct2[i] = (j>>(i>>4))&1 ? (rand()%(2*scale+1))-scale : 0; \ + int scale = PIXEL_MAX*scale1d[(i>>2)&3]*scale1d[i&3]; \ + dct1[i] = dct2[i] = (j>>(i>>4))&1 ? (rand30()%(2*scale+1))-scale : 0; \ } \ }