Commit af617efc authored by Daniel Kang's avatar Daniel Kang Committed by Fiona Glaser

SSE2 high bit depth zigzag_interleave_cavlc

Patch from Google Code-In.
parent 648147bb
......@@ -802,12 +802,15 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
}
pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
#if !HIGH_BIT_DEPTH
#if HAVE_MMX
#if HIGH_BIT_DEPTH
if( cpu&X264_CPU_SSE2 )
pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
#else
if( cpu&X264_CPU_MMX )
pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
if( cpu&X264_CPU_SHUFFLE_IS_FAST )
pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
#endif // HIGH_BIT_DEPTH
#endif
#endif // !HIGH_BIT_DEPTH
}
......@@ -1267,47 +1267,57 @@ ZIGZAG_SUB_4x4 ac, field
;-----------------------------------------------------------------------------
; void zigzag_interleave_8x8_cavlc( int16_t *dst, int16_t *src, uint8_t *nnz )
;-----------------------------------------------------------------------------
%macro INTERLEAVE 1
movq m0, [r1+%1*4+ 0]
movq m1, [r1+%1*4+ 8]
movq m2, [r1+%1*4+16]
movq m3, [r1+%1*4+24]
TRANSPOSE4x4W 0,1,2,3,4
movq [r0+%1+ 0], m0
movq [r0+%1+32], m1
movq [r0+%1+64], m2
movq [r0+%1+96], m3
%if %1
%macro INTERLEAVE 2
mova m0, [r1+(%1*4+ 0)*SIZEOF_PIXEL]
mova m1, [r1+(%1*4+ 8)*SIZEOF_PIXEL]
mova m2, [r1+(%1*4+16)*SIZEOF_PIXEL]
mova m3, [r1+(%1*4+24)*SIZEOF_PIXEL]
TRANSPOSE4x4%2 0,1,2,3,4
mova [r0+(%1+ 0)*SIZEOF_PIXEL], m0
mova [r0+(%1+32)*SIZEOF_PIXEL], m1
mova [r0+(%1+64)*SIZEOF_PIXEL], m2
mova [r0+(%1+96)*SIZEOF_PIXEL], m3
packsswb m0, m1
por m6, m2
por m7, m3
por m5, m0
%if %1
por m6, m2
por m7, m3
por m5, m0
%else
packsswb m0, m1
SWAP m5, m0
SWAP m6, m2
SWAP m7, m3
SWAP m5, m0
SWAP m6, m2
SWAP m7, m3
%endif
%endmacro
INIT_MMX
cglobal zigzag_interleave_8x8_cavlc_mmx, 3,3
INTERLEAVE 0
INTERLEAVE 8
INTERLEAVE 16
INTERLEAVE 24
packsswb m6, m7
packsswb m5, m6
packsswb m5, m5
pxor m0, m0
pcmpeqb m5, m0
paddb m5, [pb_1]
movd r0d, m5
mov [r2+0], r0w
shr r0d, 16
mov [r2+8], r0w
%macro ZIGZAG_8x8_CAVLC 2
cglobal zigzag_interleave_8x8_cavlc_%1, 3,3,8*(mmsize/16)
INTERLEAVE 0, %2
INTERLEAVE 8, %2
INTERLEAVE 16, %2
INTERLEAVE 24, %2
packsswb m6, m7
packsswb m5, m6
packsswb m5, m5
pxor m0, m0
%ifdef HIGH_BIT_DEPTH
packsswb m5, m5
%endif
pcmpeqb m5, m0
paddb m5, [pb_1]
movd r0d, m5
mov [r2+0], r0w
shr r0d, 16
mov [r2+8], r0w
RET
%endmacro
%ifdef HIGH_BIT_DEPTH
INIT_XMM
ZIGZAG_8x8_CAVLC sse2, D
%else
INIT_MMX
ZIGZAG_8x8_CAVLC mmx , W
%endif
%macro INTERLEAVE_XMM 1
mova m0, [r1+%1*4+ 0]
......
......@@ -86,6 +86,6 @@ int x264_zigzag_sub_4x4ac_frame_ssse3( int16_t level[16], const uint8_t *src, u
int x264_zigzag_sub_4x4_field_ssse3 ( int16_t level[16], const uint8_t *src, uint8_t *dst );
int x264_zigzag_sub_4x4ac_field_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc );
void x264_zigzag_interleave_8x8_cavlc_mmx( int16_t *dst, int16_t *src, uint8_t *nnz );
void x264_zigzag_interleave_8x8_cavlc_sse2( int16_t *dst, int16_t *src, uint8_t *nnz );
void x264_zigzag_interleave_8x8_cavlc_sse2( dctcoef *dst, dctcoef *src, uint8_t *nnz );
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment