Commit ead697ca authored by Loren Merritt's avatar Loren Merritt

use x264_mc_copy_w16_sse2 in mc.copy, it was previously only in mc_luma

parent 14b45a81
......@@ -341,14 +341,15 @@ cglobal x264_mc_copy_w16_mmx, 5,7
jg .height_loop
REP_RET
cglobal x264_mc_copy_w16_sse2, 5,7
%macro COPY_W16_SSE2 2
cglobal %1, 5,7
lea r6, [r3*3]
lea r5, [r1*3]
.height_loop:
movdqu xmm0, [r2]
movdqu xmm1, [r2+r3]
movdqu xmm2, [r2+r3*2]
movdqu xmm3, [r2+r6]
%2 xmm0, [r2]
%2 xmm1, [r2+r3]
%2 xmm2, [r2+r3*2]
%2 xmm3, [r2+r6]
movdqa [r0], xmm0
movdqa [r0+r1], xmm1
movdqa [r0+r1*2], xmm2
......@@ -358,6 +359,10 @@ cglobal x264_mc_copy_w16_sse2, 5,7
sub r4d, 4
jg .height_loop
REP_RET
%endmacro
COPY_W16_SSE2 x264_mc_copy_w16_sse2, movdqu
COPY_W16_SSE2 x264_mc_copy_w16_aligned_sse2, movdqa
......
......@@ -49,6 +49,7 @@ extern void x264_mc_copy_w4_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w8_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_sse2( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_aligned_sse2( uint8_t *, int, uint8_t *, int, int );
extern void x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int );
extern void x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int );
......@@ -215,6 +216,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->mc_luma = mc_luma_sse2;
pf->get_ref = get_ref_sse2;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2;
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_sse2;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment