Commit dcf40697 authored by Henrik Gramner's avatar Henrik Gramner
Browse files

Support YUYV and UYVY packed 4:2:2 raw input

Packed YUV is arguably more common than planar YUV when dealing with raw
4:2:2 content.

We can utilize the existing plane_copy_deinterleave() functions with some
additional minor constraints (we cannot assume any particular alignment
or overread the input buffer).

Enables assembly optimizations on x86.
parent aaa9aa83
......@@ -1143,6 +1143,8 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
[X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, },
[X264_CSP_YUYV] = { 1, { 256*2 }, { 256*1 }, },
[X264_CSP_UYVY] = { 1, { 256*2 }, { 256*1 }, },
[X264_CSP_I444] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV24] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_BGR] = { 1, { 256*3 }, { 256*1 }, },
......
......@@ -54,6 +54,8 @@ static int x264_frame_internal_csp( int external_csp )
case X264_CSP_NV16:
case X264_CSP_I422:
case X264_CSP_YV16:
case X264_CSP_YUYV:
case X264_CSP_UYVY:
case X264_CSP_V210:
return X264_CSP_NV16;
case X264_CSP_I444:
......@@ -408,7 +410,13 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
uint8_t *pix[3];
int stride[3];
if( i_csp == X264_CSP_V210 )
if( i_csp == X264_CSP_YUYV || i_csp == X264_CSP_UYVY )
{
int p = i_csp == X264_CSP_UYVY;
h->mc.plane_copy_deinterleave_yuyv( dst->plane[p], dst->i_stride[p], dst->plane[p^1], dst->i_stride[p^1],
(pixel*)src->img.plane[0], src->img.i_stride[0], h->param.i_width, h->param.i_height );
}
else if( i_csp == X264_CSP_V210 )
{
stride[0] = src->img.i_stride[0];
pix[0] = src->img.plane[0];
......
......@@ -325,15 +325,14 @@ void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst,
}
}
static void x264_plane_copy_deinterleave_c( pixel *dstu, intptr_t i_dstu,
pixel *dstv, intptr_t i_dstv,
pixel *src, intptr_t i_src, int w, int h )
void x264_plane_copy_deinterleave_c( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
pixel *src, intptr_t i_src, int w, int h )
{
for( int y=0; y<h; y++, dstu+=i_dstu, dstv+=i_dstv, src+=i_src )
for( int y=0; y<h; y++, dsta+=i_dsta, dstb+=i_dstb, src+=i_src )
for( int x=0; x<w; x++ )
{
dstu[x] = src[2*x];
dstv[x] = src[2*x+1];
dsta[x] = src[2*x];
dstb[x] = src[2*x+1];
}
}
......@@ -649,6 +648,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
pf->plane_copy_swap = x264_plane_copy_swap_c;
pf->plane_copy_interleave = x264_plane_copy_interleave_c;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_c;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
......
......@@ -160,6 +160,39 @@ static void x264_plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src,
x264_plane_copy_swap_c( dst, i_dst, src, i_src, w, h );\
}
void x264_plane_copy_deinterleave_c( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
pixel *src, intptr_t i_src, int w, int h );
/* We can utilize existing plane_copy_deinterleave() functions for YUYV/UYUV
* input with the additional constraint that we cannot overread src. */
#define PLANE_COPY_YUYV(align, cpu)\
static void x264_plane_copy_deinterleave_yuyv_##cpu( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,\
pixel *src, intptr_t i_src, int w, int h )\
{\
int c_w = (align>>1) / sizeof(pixel) - 1;\
if( !(w&c_w) )\
x264_plane_copy_deinterleave_##cpu( dsta, i_dsta, dstb, i_dstb, src, i_src, w, h );\
else if( w > c_w )\
{\
if( --h > 0 )\
{\
if( i_src > 0 )\
{\
x264_plane_copy_deinterleave_##cpu( dsta, i_dsta, dstb, i_dstb, src, i_src, w, h );\
dsta += i_dsta * h;\
dstb += i_dstb * h;\
src += i_src * h;\
}\
else\
x264_plane_copy_deinterleave_##cpu( dsta+i_dsta, i_dsta, dstb+i_dstb, i_dstb,\
src+i_src, i_src, w, h );\
}\
x264_plane_copy_deinterleave_c( dsta, 0, dstb, 0, src, 0, w, 1 );\
}\
else\
x264_plane_copy_deinterleave_c( dsta, i_dsta, dstb, i_dstb, src, i_src, w, h );\
}
void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
......@@ -260,6 +293,8 @@ typedef struct
/* may write up to 15 pixels off the end of each plane */
void (*plane_copy_deinterleave)( pixel *dstu, intptr_t i_dstu, pixel *dstv, intptr_t i_dstv,
pixel *src, intptr_t i_src, int w, int h );
void (*plane_copy_deinterleave_yuyv)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
pixel *src, intptr_t i_src, int w, int h );
void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h );
void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty,
......
......@@ -1044,8 +1044,8 @@ PLANE_COPY_CORE 1
%endif ; HIGH_BIT_DEPTH
%endmacro
%macro DEINTERLEAVE 6 ; dstu, dstv, src, dstv==dstu+8, shuffle constant, is aligned
mova m0, [%3]
%macro DEINTERLEAVE 6 ; dsta, dstb, src, dsta==dstb+8, shuffle constant, is aligned
mov%6 m0, [%3]
%if mmsize == 32
pshufb m0, %5
vpermq m0, m0, q3120
......@@ -1056,7 +1056,7 @@ PLANE_COPY_CORE 1
vextracti128 [%2], m0, 1
%endif
%elif HIGH_BIT_DEPTH
mova m1, [%3+mmsize]
mov%6 m1, [%3+mmsize]
psrld m2, m0, 16
psrld m3, m1, 16
pand m0, %5
......@@ -1181,8 +1181,8 @@ cglobal store_interleave_chroma, 5,5
%macro PLANE_DEINTERLEAVE 0
;-----------------------------------------------------------------------------
; void plane_copy_deinterleave( pixel *dstu, intptr_t i_dstu,
; pixel *dstv, intptr_t i_dstv,
; void plane_copy_deinterleave( pixel *dsta, intptr_t i_dsta,
; pixel *dstb, intptr_t i_dstb,
; pixel *src, intptr_t i_src, int w, int h )
;-----------------------------------------------------------------------------
%if ARCH_X86_64
......
......@@ -99,17 +99,17 @@ void x264_plane_copy_interleave_core_sse2( pixel *dst, intptr_t i_dst,
void x264_plane_copy_interleave_core_avx( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
void x264_plane_copy_deinterleave_sse2( pixel *dstu, intptr_t i_dstu,
pixel *dstv, intptr_t i_dstv,
void x264_plane_copy_deinterleave_sse2( pixel *dsta, intptr_t i_dsta,
pixel *dstb, intptr_t i_dstb,
pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_deinterleave_ssse3( uint8_t *dstu, intptr_t i_dstu,
uint8_t *dstv, intptr_t i_dstv,
void x264_plane_copy_deinterleave_ssse3( uint8_t *dsta, intptr_t i_dsta,
uint8_t *dstb, intptr_t i_dstb,
uint8_t *src, intptr_t i_src, int w, int h );
void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu,
uint16_t *dstv, intptr_t i_dstv,
void x264_plane_copy_deinterleave_avx( uint16_t *dsta, intptr_t i_dsta,
uint16_t *dstb, intptr_t i_dstb,
uint16_t *src, intptr_t i_src, int w, int h );
void x264_plane_copy_deinterleave_avx2( pixel *dstu, intptr_t i_dstu,
pixel *dstv, intptr_t i_dstv,
void x264_plane_copy_deinterleave_avx2( pixel *dsta, intptr_t i_dsta,
pixel *dstb, intptr_t i_dstb,
pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta,
pixel *dstb, intptr_t i_dstb,
......@@ -498,6 +498,15 @@ PLANE_COPY(32, avx)
PLANE_COPY_SWAP(16, ssse3)
PLANE_COPY_SWAP(32, avx2)
#if HIGH_BIT_DEPTH
PLANE_COPY_YUYV(64, sse2)
PLANE_COPY_YUYV(64, avx)
#else
PLANE_COPY_YUYV(32, sse2)
PLANE_COPY_YUYV(32, ssse3)
#endif
PLANE_COPY_YUYV(64, avx2)
PLANE_INTERLEAVE(mmx2)
PLANE_INTERLEAVE(sse2)
#if HIGH_BIT_DEPTH
......@@ -606,6 +615,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->plane_copy_interleave = x264_plane_copy_interleave_sse2;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_sse2;
pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_sse2;
if( cpu&X264_CPU_SSE2_IS_FAST )
{
......@@ -661,6 +671,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx;
pf->plane_copy_interleave = x264_plane_copy_interleave_avx;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_avx;
pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_avx;
pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx;
pf->store_interleave_chroma = x264_store_interleave_chroma_avx;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_avx;
......@@ -702,6 +713,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->hpel_filter = x264_hpel_filter_sse2_amd;
pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_sse2;
pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_sse2;
pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_sse2;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_sse2;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_sse2;
......@@ -763,6 +775,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_ssse3;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_ssse3;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_ssse3;
pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_ssse3;
}
if( !(cpu&X264_CPU_SLOW_PALIGNR) )
......@@ -844,6 +857,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
return;
pf->plane_copy_swap = x264_plane_copy_swap_avx2;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_avx2;
pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_avx2;
pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx2;
pf->get_ref = get_ref_avx2;
pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx2;
......
......@@ -494,7 +494,8 @@ static int x264_validate_parameters( x264_t *h, int b_open )
#endif
if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
{
x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/YUYV/UYVY/"
"I444/YV24/BGR/BGRA/RGB supported)\n" );
return -1;
}
......
......@@ -154,10 +154,12 @@ static int convert_csp_to_pix_fmt( int csp )
case X264_CSP_RGB: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_RGB48 : AV_PIX_FMT_RGB24;
case X264_CSP_BGR: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGR48 : AV_PIX_FMT_BGR24;
case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA;
/* the next csp has no equivalent 16bit depth in swscale */
/* the following has no equivalent 16-bit depth in swscale */
case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12;
case X264_CSP_NV21: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV21;
/* the next csp is no supported by swscale at all */
case X264_CSP_YUYV: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_YUYV422;
case X264_CSP_UYVY: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_UYVY422;
/* the following is not supported by swscale at all */
case X264_CSP_NV16:
default: return AV_PIX_FMT_NONE;
}
......
......@@ -43,6 +43,8 @@ const x264_cli_csp_t x264_cli_csps[] = {
[X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV21] = { "nv21", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV16] = { "nv16", 2, { 1, 1 }, { 1, 1 }, 2, 1 },
[X264_CSP_YUYV] = { "yuyv", 1, { 2 }, { 1 }, 2, 1 },
[X264_CSP_UYVY] = { "uyvy", 1, { 2 }, { 1 }, 2, 1 },
[X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
[X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },
[X264_CSP_RGB] = { "rgb", 1, { 3 }, { 1 }, 1, 1 },
......
......@@ -1526,6 +1526,33 @@ static int check_mc( int cpu_ref, int cpu_new )
}
}
if( mc_a.plane_copy_deinterleave_yuyv != mc_ref.plane_copy_deinterleave_yuyv )
{
set_func_name( "plane_copy_deinterleave_yuyv" );
used_asm = 1;
for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
{
int w = (plane_specs[i].w + 1) >> 1;
int h = plane_specs[i].h;
intptr_t dst_stride = ALIGN( w, 32/sizeof(pixel) );
intptr_t src_stride = (plane_specs[i].src_stride + 1) >> 1;
intptr_t offv = dst_stride*h;
pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
memset( pbuf3, 0, 0x1000 );
memset( pbuf4, 0, 0x1000 );
/* Skip benchmarking since it's the same as plane_copy_deinterleave(), just verify correctness. */
call_c1( mc_c.plane_copy_deinterleave_yuyv, pbuf3, dst_stride, pbuf3+offv, dst_stride, src1, src_stride, w, h );
call_a1( mc_a.plane_copy_deinterleave_yuyv, pbuf4, dst_stride, pbuf4+offv, dst_stride, src1, src_stride, w, h );
for( int y = 0; y < h; y++ )
if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(pixel) ) ||
memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(pixel) ) )
{
fprintf( stderr, "plane_copy_deinterleave_yuyv FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
break;
}
}
}
if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb )
{
set_func_name( "plane_copy_deinterleave_rgb" );
......
......@@ -45,7 +45,7 @@ extern "C" {
#include "x264_config.h"
#define X264_BUILD 148
#define X264_BUILD 149
/* Application developers planning to link against a shared library version of
* libx264 from a Microsoft Visual Studio or similar development environment
......@@ -227,13 +227,15 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */
#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */
#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */
#define X264_CSP_V210 0x0008 /* 10-bit yuv 4:2:2 packed in 32 */
#define X264_CSP_I444 0x0009 /* yuv 4:4:4 planar */
#define X264_CSP_YV24 0x000a /* yvu 4:4:4 planar */
#define X264_CSP_BGR 0x000b /* packed bgr 24bits */
#define X264_CSP_BGRA 0x000c /* packed bgr 32bits */
#define X264_CSP_RGB 0x000d /* packed rgb 24bits */
#define X264_CSP_MAX 0x000e /* end of list */
#define X264_CSP_YUYV 0x0008 /* yuyv 4:2:2 packed */
#define X264_CSP_UYVY 0x0009 /* uyvy 4:2:2 packed */
#define X264_CSP_V210 0x000a /* 10-bit yuv 4:2:2 packed in 32 */
#define X264_CSP_I444 0x000b /* yuv 4:4:4 planar */
#define X264_CSP_YV24 0x000c /* yvu 4:4:4 planar */
#define X264_CSP_BGR 0x000d /* packed bgr 24bits */
#define X264_CSP_BGRA 0x000e /* packed bgr 32bits */
#define X264_CSP_RGB 0x000f /* packed rgb 24bits */
#define X264_CSP_MAX 0x0010 /* end of list */
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment