Commit 627f891c authored by Xiaolei Yu's avatar Xiaolei Yu Committed by Henrik Gramner

NV21 input support

Eliminates an extra copy when encoding Android camera preview images.

Checkasm test by Janne Grunau.
ARM assembly with improvements from Janne Grunau.
parent 6ee94dc8
......@@ -1566,6 +1566,30 @@ blocki:
pop {r4-r7, pc}
endfunc
function x264_plane_copy_swap_neon
push {r4-r5, lr}
ldrd r4, r5, [sp, #12]
add lr, r4, #15
bic lr, lr, #15
sub r1, r1, lr, lsl #1
sub r3, r3, lr, lsl #1
1:
vld1.8 {q0, q1}, [r2]!
subs lr, lr, #16
vrev16.8 q0, q0
vrev16.8 q1, q1
vst1.8 {q0, q1}, [r0]!
bgt 1b
subs r5, r5, #1
add r0, r0, r1
add r2, r2, r3
mov lr, r4
bgt 1b
pop {r4-r5, pc}
endfunc
function x264_store_interleave_chroma_neon
push {lr}
ldr lr, [sp, #4]
......
......@@ -57,6 +57,8 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
void x264_plane_copy_interleave_neon( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
void x264_plane_copy_swap_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h );
void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
......@@ -240,6 +242,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
pf->plane_copy_swap = x264_plane_copy_swap_neon;
pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
......
......@@ -1142,6 +1142,7 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
[X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
[X264_CSP_NV21] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
[X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, },
......
......@@ -47,6 +47,7 @@ static int x264_frame_internal_csp( int external_csp )
switch( external_csp & X264_CSP_MASK )
{
case X264_CSP_NV12:
case X264_CSP_NV21:
case X264_CSP_I420:
case X264_CSP_YV12:
return X264_CSP_NV12;
......@@ -435,6 +436,12 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift );
}
else if( i_csp == X264_CSP_NV21 )
{
get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift );
h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift );
}
else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 )
{
int uv_swap = i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16;
......
......@@ -299,6 +299,17 @@ void x264_plane_copy_c( pixel *dst, intptr_t i_dst,
}
}
void x264_plane_copy_swap_c( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h )
{
for( int y=0; y<h; y++, dst+=i_dst, src+=i_src )
for( int x=0; x<2*w; x+=2 )
{
dst[x] = src[x+1];
dst[x+1] = src[x];
}
}
void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h )
......@@ -612,6 +623,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;
pf->plane_copy = x264_plane_copy_c;
pf->plane_copy_swap = x264_plane_copy_swap_c;
pf->plane_copy_interleave = x264_plane_copy_interleave_c;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
......
......@@ -88,6 +88,7 @@ typedef struct
void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height );
void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
void (*plane_copy_swap)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
void (*plane_copy_interleave)( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
/* may write up to 15 pixels off the end of each plane */
......
......@@ -492,7 +492,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
#endif
if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
{
x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
return -1;
}
......
......@@ -50,6 +50,7 @@ static int depth_filter_csp_is_supported( int csp )
csp_mask == X264_CSP_YV16 ||
csp_mask == X264_CSP_YV24 ||
csp_mask == X264_CSP_NV12 ||
csp_mask == X264_CSP_NV21 ||
csp_mask == X264_CSP_NV16 ||
csp_mask == X264_CSP_BGR ||
csp_mask == X264_CSP_RGB ||
......@@ -59,7 +60,7 @@ static int depth_filter_csp_is_supported( int csp )
static int csp_num_interleaved( int csp, int plane )
{
int csp_mask = csp & X264_CSP_MASK;
return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 :
return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV21 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 :
csp_mask == X264_CSP_BGR || csp_mask == X264_CSP_RGB ? 3 :
csp_mask == X264_CSP_BGRA ? 4 :
1;
......
......@@ -156,6 +156,7 @@ static int convert_csp_to_pix_fmt( int csp )
case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA;
/* the next csp has no equivalent 16bit depth in swscale */
case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12;
case X264_CSP_NV21: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV21;
/* the next csp is no supported by swscale at all */
case X264_CSP_NV16:
default: return AV_PIX_FMT_NONE;
......
......@@ -33,6 +33,7 @@ const x264_cli_csp_t x264_cli_csps[] = {
[X264_CSP_YV16] = { "yv16", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 },
[X264_CSP_YV24] = { "yv24", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 },
[X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV21] = { "nv21", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV16] = { "nv16", 2, { 1, 1 }, { 1, 1 }, 2, 1 },
[X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
[X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },
......
......@@ -1413,6 +1413,32 @@ static int check_mc( int cpu_ref, int cpu_new )
}
}
if( mc_a.plane_copy_swap != mc_ref.plane_copy_swap )
{
set_func_name( "plane_copy_swap" );
used_asm = 1;
for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
{
int w = (plane_specs[i].w + 1) >> 1;
int h = plane_specs[i].h;
intptr_t src_stride = plane_specs[i].src_stride;
intptr_t dst_stride = (2*w + 127) & ~63;
assert( dst_stride * h <= 0x1000 );
pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
memset( pbuf3, 0, 0x1000*sizeof(pixel) );
memset( pbuf4, 0, 0x1000*sizeof(pixel) );
call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h );
call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h );
for( int y = 0; y < h; y++ )
if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) )
{
ok = 0;
fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
break;
}
}
}
if( mc_a.plane_copy_interleave != mc_ref.plane_copy_interleave )
{
set_func_name( "plane_copy_interleave" );
......
......@@ -41,7 +41,7 @@
#include "x264_config.h"
#define X264_BUILD 146
#define X264_BUILD 147
/* Application developers planning to link against a shared library version of
* libx264 from a Microsoft Visual Studio or similar development environment
......@@ -214,16 +214,17 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
#define X264_CSP_I420 0x0001 /* yuv 4:2:0 planar */
#define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */
#define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */
#define X264_CSP_I422 0x0004 /* yuv 4:2:2 planar */
#define X264_CSP_YV16 0x0005 /* yvu 4:2:2 planar */
#define X264_CSP_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */
#define X264_CSP_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */
#define X264_CSP_I444 0x0008 /* yuv 4:4:4 planar */
#define X264_CSP_YV24 0x0009 /* yvu 4:4:4 planar */
#define X264_CSP_BGR 0x000a /* packed bgr 24bits */
#define X264_CSP_BGRA 0x000b /* packed bgr 32bits */
#define X264_CSP_RGB 0x000c /* packed rgb 24bits */
#define X264_CSP_MAX 0x000d /* end of list */
#define X264_CSP_NV21 0x0004 /* yuv 4:2:0, with one y plane and one packed v+u */
#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */
#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */
#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */
#define X264_CSP_V210 0x0008 /* 10-bit yuv 4:2:2 packed in 32 */
#define X264_CSP_I444 0x0009 /* yuv 4:4:4 planar */
#define X264_CSP_YV24 0x000a /* yvu 4:4:4 planar */
#define X264_CSP_BGR 0x000b /* packed bgr 24bits */
#define X264_CSP_BGRA 0x000c /* packed bgr 32bits */
#define X264_CSP_RGB 0x000d /* packed rgb 24bits */
#define X264_CSP_MAX 0x000e /* end of list */
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment