Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Steve Lhomme
VLC
Commits
a3883709
Commit
a3883709
authored
Aug 01, 2007
by
damienf
Browse files
video_chroma: added I420_ABGR32 support (mostly for opengl), some clean up as well
parent
7b64c064
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
modules/video_chroma/i420_rgb.c
View file @
a3883709
...
...
@@ -155,6 +155,15 @@ static int Activate( vlc_object_t *p_this )
msg_Dbg
(
p_this
,
"RGB pixel format is A8R8G8B8"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_A8R8G8B8
);
}
else
if
(
p_vout
->
output
.
i_rmask
==
0xff000000
&&
p_vout
->
output
.
i_gmask
==
0x00ff0000
&&
p_vout
->
output
.
i_bmask
==
0x0000ff00
)
{
/* R8G8B8A8 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is R8G8B8A8"
);
//p_vout->chroma.pf_convert = E_(I420_B8G8R8A8);
return
-
1
;
}
else
if
(
p_vout
->
output
.
i_rmask
==
0x0000ff00
&&
p_vout
->
output
.
i_gmask
==
0x00ff0000
&&
p_vout
->
output
.
i_bmask
==
0xff000000
)
...
...
@@ -163,10 +172,18 @@ static int Activate( vlc_object_t *p_this )
msg_Dbg
(
p_this
,
"RGB pixel format is B8G8R8A8"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_B8G8R8A8
);
}
else
if
(
p_vout
->
output
.
i_rmask
==
0x000000ff
&&
p_vout
->
output
.
i_gmask
==
0x0000ff00
&&
p_vout
->
output
.
i_bmask
==
0x00ff0000
)
{
/* A8B8G8R8 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is A8B8G8R8"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_A8B8G8R8
);
}
else
return
-
1
;
#else
/
/
generic C chroma converter */
/
*
generic C chroma converter */
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_RGB32
);
#endif
break
;
...
...
modules/video_chroma/i420_rgb.h
View file @
a3883709
...
...
@@ -65,6 +65,7 @@ void E_(I420_R5G5B5) ( vout_thread_t *, picture_t *, picture_t * );
void
E_
(
I420_R5G6B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_A8R8G8B8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_B8G8R8A8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_A8B8G8R8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#endif
/*****************************************************************************
...
...
modules/video_chroma/i420_rgb16.c
View file @
a3883709
...
...
@@ -35,14 +35,8 @@
#if defined (MODULE_NAME_IS_i420_rgb)
# include "i420_rgb_c.h"
#elif defined (MODULE_NAME_IS_i420_rgb_mmx)
# if defined(HAVE_MMX_INTRINSICS)
# include <mmintrin.h>
# endif
# include "i420_rgb_mmx.h"
#elif defined (MODULE_NAME_IS_i420_rgb_sse2)
# if defined(HAVE_SSE2_INTRINSICS)
# include <emmintrin.h>
# endif
# include "i420_rgb_mmx.h"
#endif
...
...
@@ -309,7 +303,7 @@ void E_(I420_RGB16)( vout_thread_t *p_vout, picture_t *p_src,
}
}
#else // defined (MODULE_NAME_IS_i420_rgb
_mmx
)
#else //
!
defined (MODULE_NAME_IS_i420_rgb)
void
E_
(
I420_R5G5B5
)(
vout_thread_t
*
p_vout
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
...
...
@@ -388,20 +382,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_SSE2)
__asm__
(
".p2align 3"
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_ALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_16_ALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_15_ALIGNED
#endif
SSE2_CALL
(
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -416,23 +402,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
#if defined (CAN_COMPILE_SSE2)
__asm__
(
".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
{
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_15_UNALIGNED
}
#endif
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -459,20 +434,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_SSE2)
__asm__
(
".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_15_UNALIGNED
#endif
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -487,23 +454,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
#if defined (CAN_COMPILE_SSE2)
__asm__
(
".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
{
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_15_UNALIGNED
}
#endif
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -522,11 +478,7 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
}
/* make sure all SSE2 stores are visible thereafter */
#if defined (CAN_COMPILE_SSE2)
__asm__
__volatile__
(
"sfence"
:::
"memory"
);
#else
_mm_sfence
();
#endif
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
...
...
@@ -546,22 +498,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
8
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_MMX)
__asm__
(
".p2align 3"
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
);
#else
__m64
mm0
,
mm1
,
mm2
,
mm3
,
mm4
,
mm5
,
mm6
,
mm7
;
uint64_t
tmp64
;
MMX_INTRINSICS_INIT_16
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_15
#endif
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
...
...
@@ -577,24 +519,12 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
#if defined (CAN_COMPILE_MMX)
__asm__
(
".p2align 3"
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
);
#else
{
__m64
mm0
,
mm1
,
mm2
,
mm3
,
mm4
,
mm5
,
mm6
,
mm7
;
uint64_t
tmp64
;
MMX_INTRINSICS_INIT_16
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_15
}
#endif
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
...
...
@@ -611,11 +541,7 @@ void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
}
}
/* re-enable FPU registers */
#if defined (CAN_COMPILE_MMX)
__asm__
__volatile__
(
"emms"
);
#else
_mm_empty
();
#endif
MMX_END
;
#endif
}
...
...
@@ -697,20 +623,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_SSE2)
__asm__
(
".p2align 3"
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_ALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_16_ALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_16_ALIGNED
#endif
SSE2_CALL
(
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -725,23 +643,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
#if defined (CAN_COMPILE_SSE2)
__asm__
(
".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
{
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_16_UNALIGNED
}
#endif
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -768,20 +675,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_SSE2)
__asm__
(
".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_16_UNALIGNED
#endif
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -796,23 +695,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
#if defined (CAN_COMPILE_SSE2)
__asm__
(
".p2align 3"
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
{
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_16_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_16_UNALIGNED
}
#endif
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -831,11 +719,7 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
}
/* make sure all SSE2 stores are visible thereafter */
#if defined (CAN_COMPILE_SSE2)
__asm__
__volatile__
(
"sfence"
:::
"memory"
);
#else
_mm_sfence
();
#endif
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
...
...
@@ -855,22 +739,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
8
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_MMX)
__asm__
(
".p2align 3"
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
);
#else
__m64
mm0
,
mm1
,
mm2
,
mm3
,
mm4
,
mm5
,
mm6
,
mm7
;
uint64_t
tmp64
;
MMX_INTRINSICS_INIT_16
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_16
#endif
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
...
...
@@ -886,24 +760,12 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
#if defined (CAN_COMPILE_MMX)
__asm__
(
".p2align 3"
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
);
#else
{
__m64
mm0
,
mm1
,
mm2
,
mm3
,
mm4
,
mm5
,
mm6
,
mm7
;
uint64_t
tmp64
;
MMX_INTRINSICS_INIT_16
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_16
}
#endif
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
...
...
@@ -920,11 +782,7 @@ void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
}
}
/* re-enable FPU registers */
#if defined (CAN_COMPILE_MMX)
__asm__
__volatile__
(
"emms"
);
#else
_mm_empty
();
#endif
MMX_END
;
#endif
}
...
...
@@ -1118,23 +976,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__
(
".p2align 3"
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_ALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
/* otherwise use SSE2 C intrinsics wrappers */
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_32_ALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_ARGB_ALIGNED
#endif
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -1149,25 +996,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__
(
".p2align 3"
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
/* otherwise use SSE2 intrinsics wrappers */
{
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_32_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_ARGB_UNALIGNED
}
#endif
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
...
...
@@ -1194,23 +1028,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
16
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__
(
".p2align 3"
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
/* otherwise use SSE2 C intrinsics wrappers */
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_32_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_ARGB_UNALIGNED
#endif
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -1225,25 +1048,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
#if defined (CAN_COMPILE_SSE2)
/* use inline SSE2 assembly */
__asm__
(
".p2align 3"
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
:
"eax"
);
#else
/* otherwise use SSE2 intrinsics wrappers */
{
__m128i
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
,
xmm5
,
xmm6
,
xmm7
;
SSE2_INTRINSICS_INIT_32_UNALIGNED
SSE2_INTRINSICS_YUV_MUL
SSE2_INTRINSICS_YUV_ADD
SSE2_INTRINSICS_UNPACK_32_ARGB_UNALIGNED
}
#endif
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
...
...
@@ -1262,11 +1072,7 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
}
/* make sure all SSE2 stores are visible thereafter */
#if defined (CAN_COMPILE_SSE2)
__asm__
__volatile__
(
"sfence"
:::
"memory"
);
#else
_mm_sfence
();
#endif
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
...
...
@@ -1286,26 +1092,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
for
(
i_x
=
p_vout
->
render
.
i_width
/
8
;
i_x
--
;
)
{
#if defined (CAN_COMPILE_MMX)
/* use inline MMX assembly */
__asm__
(
MMX_INIT_32
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
);
__asm__
(
".p2align 3"
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
:
:
"r"
(
p_y
),
"r"
(
p_u
),
"r"
(
p_v
),
"r"
(
p_buffer
)
);
#else
/* otherwise use MMX C intrinsics wrappers */
__m64
mm0
,
mm1
,
mm2
,
mm3
,
mm4
,
mm5
,
mm6
,
mm7
;
uint64_t
tmp64
;
MMX_INTRINSICS_INIT_32
MMX_INTRINSICS_YUV_MUL
MMX_INTRINSICS_YUV_ADD
MMX_INTRINSICS_UNPACK_32_ARGB
#endif
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
...
...
@@ -1320,26 +1112,12 @@ void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,