Commit 9bbfc302 authored by Fiona Glaser's avatar Fiona Glaser

Split prefetch_fenc between colorspaces

Add 4:2:2 version.
parent b63a73da
......@@ -210,7 +210,8 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
return;
#if !HIGH_BIT_DEPTH
pf->prefetch_fenc = x264_prefetch_fenc_arm;
pf->prefetch_fenc_420 = x264_prefetch_fenc_arm;
pf->prefetch_fenc_422 = x264_prefetch_fenc_arm; /* FIXME */
pf->prefetch_ref = x264_prefetch_ref_arm;
#endif // !HIGH_BIT_DEPTH
......
......@@ -506,7 +506,8 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
pf->hpel_filter = hpel_filter;
pf->prefetch_fenc = prefetch_fenc_null;
pf->prefetch_fenc_420 = prefetch_fenc_null;
pf->prefetch_fenc_422 = prefetch_fenc_null;
pf->prefetch_ref = prefetch_ref_null;
pf->memcpy_aligned = memcpy;
pf->memzero_aligned = memzero_aligned;
......
......@@ -103,6 +103,10 @@ typedef struct
/* prefetch the next few macroblocks of fenc or fdec */
void (*prefetch_fenc)( pixel *pix_y, int stride_y,
pixel *pix_uv, int stride_uv, int mb_x );
void (*prefetch_fenc_420)( pixel *pix_y, int stride_y,
pixel *pix_uv, int stride_uv, int mb_x );
void (*prefetch_fenc_422)( pixel *pix_y, int stride_y,
pixel *pix_uv, int stride_uv, int mb_x );
/* prefetch the next few macroblocks of a hpel reference frame */
void (*prefetch_ref)( pixel *pix, int stride, int parity );
......
......@@ -1302,9 +1302,10 @@ MC_COPY 16
; void prefetch_fenc( pixel *pix_y, int stride_y,
; pixel *pix_uv, int stride_uv, int mb_x )
;-----------------------------------------------------------------------------
INIT_MMX
%macro PREFETCH_FENC 1
%ifdef ARCH_X86_64
cglobal prefetch_fenc_mmx2, 5,5
cglobal prefetch_fenc_%1, 5,5
FIX_STRIDES r1d, r3d
and r4d, 3
mov eax, r4d
......@@ -1320,10 +1321,15 @@ cglobal prefetch_fenc_mmx2, 5,5
lea r2, [r2+rax*2+64*SIZEOF_PIXEL]
prefetcht0 [r2]
prefetcht0 [r2+r3]
%ifidn %1, 422
lea r2, [r2+r3*2]
prefetcht0 [r2]
prefetcht0 [r2+r3]
%endif
RET
%else
cglobal prefetch_fenc_mmx2, 0,3
cglobal prefetch_fenc_%1, 0,3
mov r2, r4m
mov r1, r1m
mov r0, r0m
......@@ -1346,13 +1352,24 @@ cglobal prefetch_fenc_mmx2, 0,3
lea r0, [r0+r2*2+64*SIZEOF_PIXEL]
prefetcht0 [r0]
prefetcht0 [r0+r1]
%ifidn %1, 422
lea r0, [r0+r1*2]
prefetcht0 [r0]
prefetcht0 [r0+r1]
%endif
ret
%endif ; ARCH_X86_64
%endmacro
INIT_MMX mmx2
PREFETCH_FENC 420
PREFETCH_FENC 422
;-----------------------------------------------------------------------------
; void prefetch_ref( pixel *pix, int stride, int parity )
;-----------------------------------------------------------------------------
cglobal prefetch_ref_mmx2, 3,3
INIT_MMX mmx2
cglobal prefetch_ref, 3,3
FIX_STRIDES r1d
dec r2d
and r2d, r1d
......
......@@ -86,7 +86,8 @@ void x264_mc_copy_w8_sse2( pixel *, int, pixel *, int, int );
void x264_mc_copy_w16_mmx( pixel *, int, pixel *, int, int );
void x264_mc_copy_w16_sse2( pixel *, int, pixel *, int, int );
void x264_mc_copy_w16_aligned_sse2( pixel *, int, pixel *, int, int );
void x264_prefetch_fenc_mmx2( pixel *, int, pixel *, int, int );
void x264_prefetch_fenc_420_mmx2( pixel *, int, pixel *, int, int );
void x264_prefetch_fenc_422_mmx2( pixel *, int, pixel *, int, int );
void x264_prefetch_ref_mmx2( pixel *, int, int );
void x264_plane_copy_core_mmx2( pixel *, int, pixel *, int, int w, int h);
void x264_plane_copy_c( pixel *, int, pixel *, int, int w, int h );
......@@ -517,7 +518,8 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
if( !(cpu&X264_CPU_MMX2) )
return;
pf->prefetch_fenc = x264_prefetch_fenc_mmx2;
pf->prefetch_fenc_420 = x264_prefetch_fenc_420_mmx2;
pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2;
pf->prefetch_ref = x264_prefetch_ref_mmx2;
pf->plane_copy = x264_plane_copy_mmx2;
......
......@@ -980,6 +980,7 @@ static void chroma_dsp_init( x264_t *h )
{
case CHROMA_420:
memcpy( h->predict_chroma, h->predict_8x8c, sizeof(h->predict_chroma) );
h->mc.prefetch_fenc = h->mc.prefetch_fenc_420;
h->loopf.deblock_chroma[0] = h->loopf.deblock_h_chroma_420;
h->loopf.deblock_chroma_intra[0] = h->loopf.deblock_h_chroma_420_intra;
h->loopf.deblock_chroma_mbaff = h->loopf.deblock_chroma_420_mbaff;
......@@ -990,6 +991,7 @@ static void chroma_dsp_init( x264_t *h )
break;
case CHROMA_422:
memcpy( h->predict_chroma, h->predict_8x16c, sizeof(h->predict_chroma) );
h->mc.prefetch_fenc = h->mc.prefetch_fenc_422;
h->loopf.deblock_chroma[0] = h->loopf.deblock_h_chroma_422;
h->loopf.deblock_chroma_intra[0] = h->loopf.deblock_h_chroma_422_intra;
h->loopf.deblock_chroma_mbaff = h->loopf.deblock_chroma_422_mbaff;
......@@ -999,6 +1001,7 @@ static void chroma_dsp_init( x264_t *h )
h->quantf.coeff_level_run[DCT_CHROMA_DC] = h->quantf.coeff_level_run8;
break;
case CHROMA_444:
h->mc.prefetch_fenc = h->mc.prefetch_fenc_422; /* FIXME: doesn't cover V plane */
h->loopf.deblock_chroma_mbaff = h->loopf.deblock_luma_mbaff;
h->loopf.deblock_chroma_intra_mbaff = h->loopf.deblock_luma_intra_mbaff;
break;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment