Commit ba0aca83 authored by damienf's avatar damienf

- added altivec acceleration for i420 -> YVYU, i420 -> UYVY (probably of no use to anyone)

parent 9e17f766
...@@ -43,7 +43,7 @@ ...@@ -43,7 +43,7 @@
#elif defined (MODULE_NAME_IS_i420_yuy2_mmx) #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
# define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv" # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
#elif defined (MODULE_NAME_IS_i420_yuy2_altivec) #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
# define DEST_FOURCC "YUY2,YUNV" # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
#endif #endif
/***************************************************************************** /*****************************************************************************
...@@ -52,9 +52,9 @@ ...@@ -52,9 +52,9 @@
static int Activate ( vlc_object_t * ); static int Activate ( vlc_object_t * );
static void I420_YUY2 ( vout_thread_t *, picture_t *, picture_t * ); static void I420_YUY2 ( vout_thread_t *, picture_t *, picture_t * );
#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
static void I420_YVYU ( vout_thread_t *, picture_t *, picture_t * ); static void I420_YVYU ( vout_thread_t *, picture_t *, picture_t * );
static void I420_UYVY ( vout_thread_t *, picture_t *, picture_t * ); static void I420_UYVY ( vout_thread_t *, picture_t *, picture_t * );
#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
static void I420_IUYV ( vout_thread_t *, picture_t *, picture_t * ); static void I420_IUYV ( vout_thread_t *, picture_t *, picture_t * );
static void I420_cyuv ( vout_thread_t *, picture_t *, picture_t * ); static void I420_cyuv ( vout_thread_t *, picture_t *, picture_t * );
#endif #endif
...@@ -116,7 +116,6 @@ static int Activate( vlc_object_t *p_this ) ...@@ -116,7 +116,6 @@ static int Activate( vlc_object_t *p_this )
p_vout->chroma.pf_convert = I420_YUY2; p_vout->chroma.pf_convert = I420_YUY2;
break; break;
#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
case VLC_FOURCC('Y','V','Y','U'): case VLC_FOURCC('Y','V','Y','U'):
p_vout->chroma.pf_convert = I420_YVYU; p_vout->chroma.pf_convert = I420_YVYU;
break; break;
...@@ -127,6 +126,7 @@ static int Activate( vlc_object_t *p_this ) ...@@ -127,6 +126,7 @@ static int Activate( vlc_object_t *p_this )
p_vout->chroma.pf_convert = I420_UYVY; p_vout->chroma.pf_convert = I420_UYVY;
break; break;
#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
case VLC_FOURCC('I','U','Y','V'): case VLC_FOURCC('I','U','Y','V'):
p_vout->chroma.pf_convert = I420_IUYV; p_vout->chroma.pf_convert = I420_IUYV;
break; break;
...@@ -296,7 +296,6 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source, ...@@ -296,7 +296,6 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
/***************************************************************************** /*****************************************************************************
* I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
*****************************************************************************/ *****************************************************************************/
#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
picture_t *p_dest ) picture_t *p_dest )
{ {
...@@ -307,6 +306,86 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, ...@@ -307,6 +306,86 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
int i_x, i_y; int i_x, i_y;
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
#define VEC_NEXT_LINES( ) \
p_line1 = p_line2; \
p_line2 += p_dest->p->i_pitch; \
p_y1 = p_y2; \
p_y2 += p_source->p[Y_PLANE].i_pitch;
#define VEC_LOAD_UV( ) \
u_vec = vec_ld( 0, p_u ); p_u += 16; \
v_vec = vec_ld( 0, p_v ); p_v += 16;
#define VEC_MERGE( a ) \
vu_vec = a( v_vec, u_vec ); \
y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
vector unsigned char u_vec;
vector unsigned char v_vec;
vector unsigned char vu_vec;
vector unsigned char y_vec;
if( !( ( p_vout->render.i_width % 32 ) |
( p_vout->render.i_height % 2 ) ) )
{
/* Width is a multiple of 32, we take 2 lines at a time */
for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
{
VEC_NEXT_LINES( );
for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
{
VEC_LOAD_UV( );
VEC_MERGE( vec_mergeh );
VEC_MERGE( vec_mergel );
}
}
}
else if( !( ( p_vout->render.i_width % 16 ) |
( p_vout->render.i_height % 4 ) ) )
{
/* Width is only a multiple of 16, we take 4 lines at a time */
for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
{
/* Line 1 and 2, pixels 0 to ( width - 16 ) */
VEC_NEXT_LINES( );
for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
{
VEC_LOAD_UV( );
VEC_MERGE( vec_mergeh );
VEC_MERGE( vec_mergel );
}
/* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
VEC_LOAD_UV( );
VEC_MERGE( vec_mergeh );
/* Line 3 and 4, pixels 0 to 16 */
VEC_NEXT_LINES( );
VEC_MERGE( vec_mergel );
/* Line 3 and 4, pixels 16 to ( width ) */
for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
{
VEC_LOAD_UV( );
VEC_MERGE( vec_mergeh );
VEC_MERGE( vec_mergel );
}
}
}
else
{
/* Crap, use the C version */
#undef VEC_NEXT_LINES
#undef VEC_LOAD_UV
#undef VEC_MERGE
#endif
const int i_source_margin = p_source->p[0].i_pitch const int i_source_margin = p_source->p[0].i_pitch
- p_source->p[0].i_visible_pitch; - p_source->p[0].i_visible_pitch;
const int i_source_margin_c = p_source->p[1].i_pitch const int i_source_margin_c = p_source->p[1].i_pitch
...@@ -324,7 +403,7 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, ...@@ -324,7 +403,7 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
for( i_x = p_vout->render.i_width / 8 ; i_x-- ; ) for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
{ {
#if defined (MODULE_NAME_IS_i420_yuy2) #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
C_YUV420_YVYU( ); C_YUV420_YVYU( );
C_YUV420_YVYU( ); C_YUV420_YVYU( );
C_YUV420_YVYU( ); C_YUV420_YVYU( );
...@@ -341,6 +420,9 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source, ...@@ -341,6 +420,9 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
p_line1 += i_dest_margin; p_line1 += i_dest_margin;
p_line2 += i_dest_margin; p_line2 += i_dest_margin;
} }
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
}
#endif
} }
/***************************************************************************** /*****************************************************************************
...@@ -356,6 +438,86 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, ...@@ -356,6 +438,86 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
int i_x, i_y; int i_x, i_y;
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
#define VEC_NEXT_LINES( ) \
p_line1 = p_line2; \
p_line2 += p_dest->p->i_pitch; \
p_y1 = p_y2; \
p_y2 += p_source->p[Y_PLANE].i_pitch;
#define VEC_LOAD_UV( ) \
u_vec = vec_ld( 0, p_u ); p_u += 16; \
v_vec = vec_ld( 0, p_v ); p_v += 16;
#define VEC_MERGE( a ) \
uv_vec = a( u_vec, v_vec ); \
y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
vector unsigned char u_vec;
vector unsigned char v_vec;
vector unsigned char uv_vec;
vector unsigned char y_vec;
if( !( ( p_vout->render.i_width % 32 ) |
( p_vout->render.i_height % 2 ) ) )
{
/* Width is a multiple of 32, we take 2 lines at a time */
for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
{
VEC_NEXT_LINES( );
for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
{
VEC_LOAD_UV( );
VEC_MERGE( vec_mergeh );
VEC_MERGE( vec_mergel );
}
}
}
else if( !( ( p_vout->render.i_width % 16 ) |
( p_vout->render.i_height % 4 ) ) )
{
/* Width is only a multiple of 16, we take 4 lines at a time */
for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
{
/* Line 1 and 2, pixels 0 to ( width - 16 ) */
VEC_NEXT_LINES( );
for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
{
VEC_LOAD_UV( );
VEC_MERGE( vec_mergeh );
VEC_MERGE( vec_mergel );
}
/* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
VEC_LOAD_UV( );
VEC_MERGE( vec_mergeh );
/* Line 3 and 4, pixels 0 to 16 */
VEC_NEXT_LINES( );
VEC_MERGE( vec_mergel );
/* Line 3 and 4, pixels 16 to ( width ) */
for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
{
VEC_LOAD_UV( );
VEC_MERGE( vec_mergeh );
VEC_MERGE( vec_mergel );
}
}
}
else
{
/* Crap, use the C version */
#undef VEC_NEXT_LINES
#undef VEC_LOAD_UV
#undef VEC_MERGE
#endif
const int i_source_margin = p_source->p[0].i_pitch const int i_source_margin = p_source->p[0].i_pitch
- p_source->p[0].i_visible_pitch; - p_source->p[0].i_visible_pitch;
const int i_source_margin_c = p_source->p[1].i_pitch const int i_source_margin_c = p_source->p[1].i_pitch
...@@ -373,7 +535,7 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, ...@@ -373,7 +535,7 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
for( i_x = p_vout->render.i_width / 8 ; i_x-- ; ) for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
{ {
#if defined (MODULE_NAME_IS_i420_yuy2) #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
C_YUV420_UYVY( ); C_YUV420_UYVY( );
C_YUV420_UYVY( ); C_YUV420_UYVY( );
C_YUV420_UYVY( ); C_YUV420_UYVY( );
...@@ -398,8 +560,13 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source, ...@@ -398,8 +560,13 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
#if defined (MODULE_NAME_IS_i420_yuy2_mmx) #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__ __volatile__("emms" :: ); __asm__ __volatile__("emms" :: );
#endif #endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
}
#endif
} }
#if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
/***************************************************************************** /*****************************************************************************
* I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
*****************************************************************************/ *****************************************************************************/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment