Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
VideoLAN
x264
Commits
ad6c91f0
Commit
ad6c91f0
authored
Apr 22, 2008
by
Loren Merritt
Browse files
drop support for pre-SSE3 assemblers
parent
27ae7576
Changes
13
Hide whitespace changes
Inline
Side-by-side
common/cpu.c
View file @
ad6c91f0
...
...
@@ -84,12 +84,10 @@ uint32_t x264_cpu_detect( void )
cpu
|=
X264_CPU_MMXEXT
|
X264_CPU_SSE
;
if
(
edx
&
0x04000000
)
cpu
|=
X264_CPU_SSE2
;
#ifdef HAVE_SSE3
if
(
ecx
&
0x00000001
)
cpu
|=
X264_CPU_SSE3
;
if
(
ecx
&
0x00000200
)
cpu
|=
X264_CPU_SSSE3
;
#endif
x264_cpu_cpuid
(
0x80000000
,
&
eax
,
&
ebx
,
&
ecx
,
&
edx
);
max_extended_cap
=
eax
;
...
...
common/dct.c
View file @
ad6c91f0
...
...
@@ -580,7 +580,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
pf
->
scan_8x8
=
zigzag_scan_8x8_frame
;
pf
->
scan_4x4
=
zigzag_scan_4x4_frame
;
pf
->
sub_4x4
=
zigzag_sub_4x4_frame
;
#ifdef HAVE_
SSE3
#ifdef HAVE_
MMX
if
(
cpu
&
X264_CPU_SSSE3
)
pf
->
sub_4x4
=
x264_zigzag_sub_4x4_frame_ssse3
;
#endif
...
...
common/pixel.c
View file @
ad6c91f0
...
...
@@ -359,10 +359,8 @@ SATD_X_DECL7()
#ifdef HAVE_MMX
SATD_X_DECL7
(
_mmxext
)
SATD_X_DECL5
(
_sse2
)
#ifdef HAVE_SSE3
SATD_X_DECL7
(
_ssse3
)
#endif
#endif
/****************************************************************************
* structural similarity metric
...
...
@@ -623,7 +621,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
#endif
}
#ifdef HAVE_SSE3
if
(
(
cpu
&
X264_CPU_SSE3
)
&&
(
cpu
&
X264_CPU_CACHELINE_SPLIT
)
)
{
INIT2
(
sad
,
_sse3
);
...
...
@@ -652,7 +649,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
INIT2
(
sad_x4
,
_cache64_ssse3
);
}
}
#endif //HAVE_SSE3
#endif //HAVE_MMX
#ifdef ARCH_PPC
...
...
common/quant.c
View file @
ad6c91f0
...
...
@@ -240,16 +240,14 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
dequant_8x8
=
x264_dequant_8x8_flat16_sse2
;
}
}
#endif
#ifdef HAVE_SSE3
if
(
cpu
&
X264_CPU_SSSE3
)
{
pf
->
quant_4x4_dc
=
x264_quant_4x4_dc_ssse3
;
pf
->
quant_4x4
=
x264_quant_4x4_ssse3
;
pf
->
quant_8x8
=
x264_quant_8x8_ssse3
;
}
#endif
#endif
// HAVE_MMX
#ifdef ARCH_PPC
if
(
cpu
&
X264_CPU_ALTIVEC
)
{
...
...
common/x86/dct-a.asm
View file @
ad6c91f0
...
...
@@ -325,7 +325,6 @@ cglobal x264_zigzag_scan_4x4_field_mmxext, 2,3
mov
[
r0
+
12
],
r2d
RET
%ifdef HAVE_SSE3
;-----------------------------------------------------------------------------
; void x264_zigzag_sub_4x4_frame_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst )
;-----------------------------------------------------------------------------
...
...
@@ -364,4 +363,3 @@ cglobal x264_zigzag_sub_4x4_frame_ssse3, 3,3
movdqa
[
r0
],
xmm0
movdqa
[
r0
+
16
],
xmm1
RET
%endif
common/x86/mc-a.asm
View file @
ad6c91f0
...
...
@@ -275,11 +275,9 @@ cglobal x264_pixel_avg2_w20_%1, 6,7
%endmacro
PIXEL_AVG_SSE
ss
e2
%ifdef HAVE_SSE3
%define movdqu lddqu
PIXEL_AVG_SSE
ss
e3
%undef movdqu
%endif
; Cacheline split code for processors with high latencies for loads
; split over cache lines. See sad-a.asm for a more detailed explanation.
...
...
@@ -481,9 +479,7 @@ cglobal %1, 5,7
COPY_W16_SSE2
x264_mc_copy_w16_sse2
,
movdqu
; cacheline split with mmx has too much overhead; the speed benefit is near-zero.
; but with SSE3 the overhead is zero, so there's no reason not to include it.
%ifdef HAVE_SSE3
COPY_W16_SSE2
x264_mc_copy_w16_sse3
,
lddqu
%endif
COPY_W16_SSE2
x264_mc_copy_w16_aligned_sse2
,
movdqa
...
...
common/x86/mc-a2.asm
View file @
ad6c91f0
...
...
@@ -309,10 +309,8 @@ cglobal x264_hpel_filter_h_sse2, 3,3,1
%define PALIGNR PALIGNR_SSE2
HPEL_V
ss
e2
HPEL_C
ss
e2
%ifdef HAVE_SSE3
%define PALIGNR PALIGNR_SSSE3
HPEL_C
ss
se3
%endif
cglobal
x264_sfence
sfence
...
...
common/x86/mc-c.c
View file @
ad6c91f0
...
...
@@ -102,9 +102,7 @@ PIXEL_AVG_WTAB(cache32_mmxext, mmxext, cache32_mmxext, cache32_mmxext, cache32_m
PIXEL_AVG_WTAB
(
cache64_mmxext
,
mmxext
,
cache64_mmxext
,
cache64_mmxext
,
cache64_mmxext
,
cache64_mmxext
)
PIXEL_AVG_WTAB
(
sse2
,
mmxext
,
mmxext
,
mmxext
,
sse2
,
sse2
)
PIXEL_AVG_WTAB
(
cache64_sse2
,
mmxext
,
cache64_mmxext
,
cache64_sse2
,
cache64_sse2
,
cache64_sse2
)
#ifdef HAVE_SSE3
PIXEL_AVG_WTAB
(
cache64_sse3
,
mmxext
,
cache64_mmxext
,
sse3
,
sse3
,
sse3
)
#endif
#define MC_COPY_WTAB(instr, name1, name2, name3)\
static void (* const x264_mc_copy_wtab_##instr[5])( uint8_t *, int, uint8_t *, int, int ) =\
...
...
@@ -118,9 +116,7 @@ static void (* const x264_mc_copy_wtab_##instr[5])( uint8_t *, int, uint8_t *, i
MC_COPY_WTAB
(
mmx
,
mmx
,
mmx
,
mmx
)
MC_COPY_WTAB
(
sse2
,
mmx
,
mmx
,
sse2
)
#ifdef HAVE_SSE3
MC_COPY_WTAB
(
sse3
,
mmx
,
mmx
,
sse3
)
#endif
static
const
int
hpel_ref0
[
16
]
=
{
0
,
1
,
1
,
1
,
0
,
1
,
1
,
1
,
2
,
3
,
3
,
3
,
0
,
1
,
1
,
1
};
static
const
int
hpel_ref1
[
16
]
=
{
0
,
0
,
0
,
0
,
2
,
2
,
3
,
2
,
2
,
2
,
3
,
2
,
2
,
2
,
3
,
2
};
...
...
@@ -155,9 +151,7 @@ MC_LUMA(cache64_mmxext,cache64_mmxext,mmx)
#endif
MC_LUMA
(
sse2
,
sse2
,
sse2
)
MC_LUMA
(
cache64_sse2
,
cache64_sse2
,
sse2
)
#ifdef HAVE_SSE3
MC_LUMA
(
cache64_sse3
,
cache64_sse3
,
sse3
)
#endif
#define GET_REF(name)\
uint8_t *get_ref_##name( uint8_t *dst, int *i_dst_stride,\
...
...
@@ -190,9 +184,7 @@ GET_REF(cache64_mmxext)
#endif
GET_REF
(
sse2
)
GET_REF
(
cache64_sse2
)
#ifdef HAVE_SSE3
GET_REF
(
cache64_sse3
)
#endif
#define HPEL(align, cpu, cpuv, cpuc, cpuh)\
void x264_hpel_filter_v_##cpuv( uint8_t *dst, uint8_t *src, int16_t *buf, int stride, int width);\
...
...
@@ -227,9 +219,7 @@ void x264_hpel_filter_##cpu( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_
HPEL
(
8
,
mmxext
,
mmxext
,
mmxext
,
mmxext
)
HPEL
(
16
,
sse2_amd
,
mmxext
,
mmxext
,
sse2
)
HPEL
(
16
,
sse2
,
sse2
,
sse2
,
sse2
)
#ifdef HAVE_SSE3
HPEL
(
16
,
ssse3
,
sse2
,
ssse3
,
sse2
)
#endif
void
x264_mc_init_mmx
(
int
cpu
,
x264_mc_functions_t
*
pf
)
{
...
...
@@ -305,20 +295,16 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
{
pf
->
mc_luma
=
mc_luma_cache64_sse2
;
pf
->
get_ref
=
get_ref_cache64_sse2
;
#ifdef HAVE_SSE3
/* lddqu doesn't work on Core2 */
if
(
(
cpu
&
X264_CPU_SSE3
)
&&
!
(
cpu
&
X264_CPU_SSSE3
)
)
{
pf
->
mc_luma
=
mc_luma_cache64_sse3
;
pf
->
get_ref
=
get_ref_cache64_sse3
;
}
#endif
}
if
(
!
(
cpu
&
X264_CPU_SSSE3
)
)
return
;
#ifdef HAVE_SSE3
pf
->
hpel_filter
=
x264_hpel_filter_ssse3
;
#endif
}
common/x86/pixel-a.asm
View file @
ad6c91f0
...
...
@@ -1272,7 +1272,6 @@ SATDS_SSE2 sse2
SA8D_16x16_32
ss
e2
INTRA_SA8D_SSE2
ss
e2
INTRA_SATDS_MMX
mmxext
%ifdef HAVE_SSE3
%define ABS1 ABS1_SSSE3
%define ABS2 ABS2_SSSE3
SATDS_SSE2
ss
se3
...
...
@@ -1280,7 +1279,6 @@ SA8D_16x16_32 ssse3
INTRA_SA8D_SSE2
ss
se3
INTRA_SATDS_MMX
ss
se3
SATD_W4
ss
se3
; mmx, but uses pabsw from ssse3.
%endif
...
...
@@ -1655,10 +1653,8 @@ cglobal x264_pixel_ads1_%1, 4,7
%endmacro
ADS_SSE2
ss
e2
%ifdef HAVE_SSE3
%define ABS1 ABS1_SSSE3
ADS_SSE2
ss
se3
%endif
; int x264_pixel_ads_mvs( int16_t *mvs, uint8_t *masks, int width )
; {
...
...
common/x86/predict-c.c
View file @
ad6c91f0
...
...
@@ -483,9 +483,7 @@ void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )
#ifdef ARCH_X86_64
INTRA_SA8D_X3
(
sse2
)
#ifdef HAVE_SSE3
INTRA_SA8D_X3
(
ssse3
)
#endif
#else
INTRA_SA8D_X3
(
mmxext
)
#endif
...
...
common/x86/quant-a.asm
View file @
ad6c91f0
...
...
@@ -145,11 +145,9 @@ QUANT_DC x264_quant_4x4_dc_sse2, QUANT_MMX, 2, 16
QUANT_AC
x264_quant_4x4_sse2
,
QUANT_MMX
,
2
,
16
QUANT_AC
x264_quant_8x8_sse2
,
QUANT_MMX
,
8
,
16
%ifdef HAVE_SSE3
QUANT_DC
x264_quant_4x4_dc_ssse3
,
QUANT_SSSE3
,
2
,
16
QUANT_AC
x264_quant_4x4_ssse3
,
QUANT_SSSE3
,
2
,
16
QUANT_AC
x264_quant_8x8_ssse3
,
QUANT_SSSE3
,
8
,
16
%endif
...
...
common/x86/sad-a.asm
View file @
ad6c91f0
...
...
@@ -25,7 +25,7 @@
%include "x86inc.asm"
SECTION
_RODATA
sw_64:
d
q
64
sw_64:
d
d
64
SECTION
.text
...
...
@@ -213,11 +213,9 @@ cglobal x264_pixel_sad_16x8_%1, 4,4
%endmacro
SAD_W16
ss
e2
%ifdef HAVE_SSE3
%define movdqu lddqu
SAD_W16
ss
e3
%undef movdqu
%endif
...
...
@@ -613,14 +611,12 @@ SAD_X_SSE2 3, 16, 8, sse2
SAD_X_SSE2
4
,
16
,
16
,
ss
e2
SAD_X_SSE2
4
,
16
,
8
,
ss
e2
%ifdef HAVE_SSE3
%define movdqu lddqu
SAD_X_SSE2
3
,
16
,
16
,
ss
e3
SAD_X_SSE2
3
,
16
,
8
,
ss
e3
SAD_X_SSE2
4
,
16
,
16
,
ss
e3
SAD_X_SSE2
4
,
16
,
8
,
ss
e3
%undef movdqu
%endif
...
...
@@ -961,7 +957,6 @@ SADX34_CACHELINE_FUNC 16, 16, 64, sse2, sse2
SADX34_CACHELINE_FUNC
16
,
8
,
64
,
ss
e2
,
ss
e2
%endif
; !ARCH_X86_64
%ifdef HAVE_SSE3
SAD16_CACHELINE_FUNC
ss
se3
,
8
SAD16_CACHELINE_FUNC
ss
se3
,
16
%assign i 1
...
...
@@ -971,4 +966,3 @@ SAD16_CACHELINE_LOOP_SSSE3 i
%endrep
SADX34_CACHELINE_FUNC
16
,
16
,
64
,
ss
e2
,
ss
se3
SADX34_CACHELINE_FUNC
16
,
8
,
64
,
ss
e2
,
ss
se3
%endif
; HAVE_SSE3
configure
View file @
ad6c91f0
...
...
@@ -321,12 +321,8 @@ if [ $ARCH = X86 -o $ARCH = X86_64 ] ; then
echo
"yasm prior to 0.6.2 miscompiles PIC. trying nasm instead..."
AS
=
nasm
fi
if
as_check
;
then
if
as_check
"pabsw xmm0, xmm0"
;
then
CFLAGS
=
"
$CFLAGS
-DHAVE_MMX"
if
as_check
"pabsw xmm0, xmm0"
;
then
ASFLAGS
=
"
$ASFLAGS
-DHAVE_SSE3"
CFLAGS
=
"
$CFLAGS
-DHAVE_SSE3"
fi
else
echo
"No suitable assembler found. x264 will be several times slower."
echo
"Please install 'yasm' to get MMX/SSE optimized code."
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment