x86: add SSSE3 cdef dir implementation

```------------------
x86_64:
```

---------------------------------------
cdef_dir_8bpc_c: 1023.1
cdef_dir_8bpc_ssse3: 110.3
cdef_dir_8bpc_avx2: 71.1
------------------------------------------

---------------------
x86_32:
------------------------------------------
cdef_dir_8bpc_c: 1074.8
cdef_dir_8bpc_ssse3: 120.6
------------------------------------------

Thanks to Ronald for the AVX2 XMM version which was a very good starting
point.
parent 18ef9556
Pipeline #5357 passed with stages
in 5 minutes and 42 seconds
......@@ -38,6 +38,7 @@ decl_cdef_fn(dav1d_cdef_filter_4x4_avx2);
decl_cdef_fn(dav1d_cdef_filter_4x4_ssse3);
decl_cdef_dir_fn(dav1d_cdef_dir_avx2);
decl_cdef_dir_fn(dav1d_cdef_dir_ssse3);
void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
......@@ -45,6 +46,7 @@ void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
#if BITDEPTH ==8
c->dir = dav1d_cdef_dir_ssse3;
c->fb[0] = dav1d_cdef_filter_8x8_ssse3;
c->fb[1] = dav1d_cdef_filter_4x8_ssse3;
c->fb[2] = dav1d_cdef_filter_4x4_ssse3;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment