Commit a3883709 authored by Damien Fouilleul's avatar Damien Fouilleul

video_chroma: added I420_ABGR32 support (mostly for opengl), some clean up as well

parent 7b64c064
......@@ -155,6 +155,15 @@ static int Activate( vlc_object_t *p_this )
msg_Dbg(p_this, "RGB pixel format is A8R8G8B8");
p_vout->chroma.pf_convert = E_(I420_A8R8G8B8);
}
else if( p_vout->output.i_rmask == 0xff000000
&& p_vout->output.i_gmask == 0x00ff0000
&& p_vout->output.i_bmask == 0x0000ff00 )
{
/* R8G8B8A8 pixel format */
msg_Dbg(p_this, "RGB pixel format is R8G8B8A8");
//p_vout->chroma.pf_convert = E_(I420_B8G8R8A8);
return -1;
}
else if( p_vout->output.i_rmask == 0x0000ff00
&& p_vout->output.i_gmask == 0x00ff0000
&& p_vout->output.i_bmask == 0xff000000 )
......@@ -163,10 +172,18 @@ static int Activate( vlc_object_t *p_this )
msg_Dbg(p_this, "RGB pixel format is B8G8R8A8");
p_vout->chroma.pf_convert = E_(I420_B8G8R8A8);
}
else if( p_vout->output.i_rmask == 0x000000ff
&& p_vout->output.i_gmask == 0x0000ff00
&& p_vout->output.i_bmask == 0x00ff0000 )
{
/* A8B8G8R8 pixel format */
msg_Dbg(p_this, "RGB pixel format is A8B8G8R8");
p_vout->chroma.pf_convert = E_(I420_A8B8G8R8);
}
else
return -1;
#else
// generic C chroma converter */
/* generic C chroma converter */
p_vout->chroma.pf_convert = E_(I420_RGB32);
#endif
break;
......
......@@ -65,6 +65,7 @@ void E_(I420_R5G5B5) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_R5G6B5) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_A8R8G8B8) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_B8G8R8A8) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_A8B8G8R8) ( vout_thread_t *, picture_t *, picture_t * );
#endif
/*****************************************************************************
......
......@@ -35,14 +35,8 @@
#if defined (MODULE_NAME_IS_i420_rgb)
# include "i420_rgb_c.h"
#elif defined (MODULE_NAME_IS_i420_rgb_mmx)
# if defined(HAVE_MMX_INTRINSICS)
# include <mmintrin.h>
# endif
# include "i420_rgb_mmx.h"
#elif defined (MODULE_NAME_IS_i420_rgb_sse2)
# if defined(HAVE_SSE2_INTRINSICS)
# include <emmintrin.h>
# endif
# include "i420_rgb_mmx.h"
#endif
......@@ -309,7 +303,7 @@ void E_(I420_RGB16)( vout_thread_t *p_vout, picture_t *p_src,
}
}
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
#else // ! defined (MODULE_NAME_IS_i420_rgb)
void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
picture_t *p_dest )
......@@ -388,20 +382,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width/16; i_x--; )
{
#if defined (CAN_COMPILE_SSE2)
__asm__( ".p2align 3"
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_ALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_16_ALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_15_ALIGNED
#endif
SSE2_CALL (
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -416,23 +402,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_SSE2)
__asm__( ".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_15_UNALIGNED
}
#endif
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -459,20 +434,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width/16; i_x--; )
{
#if defined (CAN_COMPILE_SSE2)
__asm__( ".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_15_UNALIGNED
#endif
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -487,23 +454,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_SSE2)
__asm__( ".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_15_UNALIGNED
}
#endif
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -522,11 +478,7 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
}
/* make sure all SSE2 stores are visible thereafter */
#if defined (CAN_COMPILE_SSE2)
__asm__ __volatile__ ( "sfence" ::: "memory" );
#else
_mm_sfence();
#endif
SSE2_END;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
......@@ -546,22 +498,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width / 8; i_x--; )
{
#if defined (CAN_COMPILE_MMX)
__asm__( ".p2align 3"
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
#else
__m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
uint64_t tmp64;
MMX_INTRINSICS_INIT_16
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_15
#endif
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y += 8;
p_u += 4;
p_v += 4;
......@@ -577,24 +519,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_MMX)
__asm__( ".p2align 3"
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
#else
{
__m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
uint64_t tmp64;
MMX_INTRINSICS_INIT_16
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_15
}
#endif
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y += 8;
p_u += 4;
p_v += 4;
......@@ -611,11 +541,7 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
}
}
/* re-enable FPU registers */
#if defined (CAN_COMPILE_MMX)
__asm__ __volatile__ ( "emms" );
#else
_mm_empty();
#endif
MMX_END;
#endif
}
......@@ -697,20 +623,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width/16; i_x--; )
{
#if defined (CAN_COMPILE_SSE2)
__asm__( ".p2align 3"
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_ALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_16_ALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_16_ALIGNED
#endif
SSE2_CALL (
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -725,23 +643,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_SSE2)
__asm__( ".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_16_UNALIGNED
}
#endif
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -768,20 +675,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width/16; i_x--; )
{
#if defined (CAN_COMPILE_SSE2)
__asm__( ".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_16_UNALIGNED
#endif
SSE2_CALL(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -796,23 +695,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_SSE2)
__asm__( ".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_16_UNALIGNED
}
#endif
SSE2_CALL(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -831,11 +719,7 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
}
/* make sure all SSE2 stores are visible thereafter */
#if defined (CAN_COMPILE_SSE2)
__asm__ __volatile__ ( "sfence" ::: "memory" );
#else
_mm_sfence();
#endif
SSE2_END;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
......@@ -855,22 +739,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width / 8; i_x--; )
{
#if defined (CAN_COMPILE_MMX)
__asm__( ".p2align 3"
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
#else
__m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
uint64_t tmp64;
MMX_INTRINSICS_INIT_16
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_16
#endif
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y += 8;
p_u += 4;
p_v += 4;
......@@ -886,24 +760,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_MMX)
__asm__( ".p2align 3"
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
#else
{
__m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
uint64_t tmp64;
MMX_INTRINSICS_INIT_16
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_16
}
#endif
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y += 8;
p_u += 4;
p_v += 4;
......@@ -920,11 +782,7 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
}
}
/* re-enable FPU registers */
#if defined (CAN_COMPILE_MMX)
__asm__ __volatile__ ( "emms" );
#else
_mm_empty();
#endif
MMX_END;
#endif
}
......@@ -1118,23 +976,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width / 16; i_x--; )
{
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__( ".p2align 3"
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_ALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
/* otherwise use SSE2 C intrinsics wrappers */
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_32_ALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_ARGB_ALIGNED
#endif
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -1149,25 +996,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__( ".p2align 3"
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
/* otherwise use SSE2 intrinsics wrappers */
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_32_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_ARGB_UNALIGNED
}
#endif
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
......@@ -1194,23 +1028,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width / 16; i_x--; )
{
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__( ".p2align 3"
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
/* otherwise use SSE2 C intrinsics wrappers */
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_32_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_ARGB_UNALIGNED
#endif
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -1225,25 +1048,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__( ".p2align 3"
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
/* otherwise use SSE2 intrinsics wrappers */
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_32_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_ARGB_UNALIGNED
}
#endif
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -1262,11 +1072,7 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
}
/* make sure all SSE2 stores are visible thereafter */
#if defined (CAN_COMPILE_SSE2)
__asm__ __volatile__ ( "sfence" ::: "memory" );
#else
_mm_sfence();
#endif
SSE2_END;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
......@@ -1286,26 +1092,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width / 8; i_x--; )
{
#if defined (CAN_COMPILE_MMX)
/* use inline MMX assembly */
__asm__( MMX_INIT_32
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
__asm__( ".p2align 3"
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
#else
/* otherwise use MMX C intrinsics wrappers */
__m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
uint64_t tmp64;
MMX_INTRINSICS_INIT_32
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_32_ARGB
#endif
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y += 8;
p_u += 4;
p_v += 4;
......@@ -1320,26 +1112,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_MMX)
/* use inline MMX assembly */
__asm__( ".p2align 3"
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) );
#else
/* otherwise use MMX intrinsics wrappers */
{
__m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
uint64_t tmp64;
MMX_INTRINSICS_INIT_32
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_32_ARGB
}
#endif
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y += 8;
p_u += 4;
p_v += 4;
......@@ -1355,12 +1133,9 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
#if defined (CAN_COMPILE_MMX)
__asm__ __volatile__ ( "emms" );
#else
_mm_empty();
#endif
MMX_END;
#endif
}
......@@ -1440,23 +1215,12 @@ void E_(I420_B8G8R8A8)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width / 16; i_x--; )
{
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__( ".p2align 3"
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_ALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
/* otherwise use SSE2 C intrinsics wrappers */
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_32_ALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_BGRA_ALIGNED
#endif
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
......@@ -1471,25 +1235,12 @@ void E_(I420_B8G8R8A8)( vout_thread_t *p_vout, picture_t *p_src,
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__( ".p2align 3"
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
: : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer) : "eax" );
#else
/* otherwise use SSE2 intrinsics wrappers */
{
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
SSE2_INTRINSICS_INIT_32_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_BGRA_UNALIGNED
}
#endif
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
......@@ -1516,23 +1267,12 @@ void E_(I420_B8G8R8A8)( vout_thread_t *p_vout, picture_t *p_src,
for ( i_x = p_vout->render.i_width / 16; i_x--; )
{
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__( ".p2align 3"
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL