x86: cdef_filter: use a better constant for SSE4

Port of dc2ae517 for AVX-2
from Kyle Siefring.

---------------------
x86_64:
------------------------------------------
cdef_filter_4x4_8bpc_ssse3: 141.7
cdef_filter_4x4_8bpc_sse4: 128.3
------------------------------------------
cdef_filter_4x8_8bpc_ssse3: 253.4
cdef_filter_4x8_8bpc_sse4: 228.5
------------------------------------------
cdef_filter_8x8_8bpc_ssse3: 429.6
cdef_filter_8x8_8bpc_sse4: 379.9
------------------------------------------

---------------------
x86_32:
------------------------------------------
cdef_filter_4x4_8bpc_ssse3: 184.3
cdef_filter_4x4_8bpc_sse4: 168.9
------------------------------------------
cdef_filter_4x8_8bpc_ssse3: 335.3
cdef_filter_4x8_8bpc_sse4: 305.1
------------------------------------------
cdef_filter_8x8_8bpc_ssse3: 579.1
cdef_filter_8x8_8bpc_sse4: 517.0
------------------------------------------
parent 7d206de2
......@@ -124,7 +124,7 @@ if is_asm_enabled
if dav1d_bitdepths.contains('8')
libdav1d_sources_asm += files(
'x86/cdef.asm',
'x86/cdef_ssse3.asm',
'x86/cdef_sse.asm',
'x86/ipred.asm',
'x86/ipred_ssse3.asm',
'x86/itx.asm',
......
......@@ -29,12 +29,15 @@
#include "src/cdef.h"
decl_cdef_fn(dav1d_cdef_filter_8x8_avx2);
decl_cdef_fn(dav1d_cdef_filter_8x8_sse4);
decl_cdef_fn(dav1d_cdef_filter_8x8_ssse3);
decl_cdef_fn(dav1d_cdef_filter_4x8_avx2);
decl_cdef_fn(dav1d_cdef_filter_4x8_sse4);
decl_cdef_fn(dav1d_cdef_filter_4x8_ssse3);
decl_cdef_fn(dav1d_cdef_filter_4x4_avx2);
decl_cdef_fn(dav1d_cdef_filter_4x4_sse4);
decl_cdef_fn(dav1d_cdef_filter_4x4_ssse3);
decl_cdef_dir_fn(dav1d_cdef_dir_avx2);
......@@ -45,13 +48,21 @@ void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
#if BITDEPTH ==8
#if BITDEPTH == 8
c->dir = dav1d_cdef_dir_ssse3;
c->fb[0] = dav1d_cdef_filter_8x8_ssse3;
c->fb[1] = dav1d_cdef_filter_4x8_ssse3;
c->fb[2] = dav1d_cdef_filter_4x4_ssse3;
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_SSE41)) return;
#if BITDEPTH == 8
c->fb[0] = dav1d_cdef_filter_8x8_sse4;
c->fb[1] = dav1d_cdef_filter_4x8_sse4;
c->fb[2] = dav1d_cdef_filter_4x4_sse4;
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
#if BITDEPTH == 8 && ARCH_X86_64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment