Commit 0afec6b1 authored by François Cartegnie's avatar François Cartegnie 🤞 Committed by Victorien Le Couviour--Tuffet

x86: add SSSE3 mc prep_8tap implementation

```------------------
x86_64:
```

---------------------------------------
mct_8tap_regular_w4_0_8bpc_c: 115.6
mct_8tap_regular_w4_0_8bpc_ssse3: 13.1
mct_8tap_regular_w4_0_8bpc_avx2: 13.3
------------------------------------------
mct_8tap_regular_w4_h_8bpc_c: 363.0
mct_8tap_regular_w4_h_8bpc_ssse3: 19.1
mct_8tap_regular_w4_h_8bpc_avx2: 16.5
------------------------------------------
mct_8tap_regular_w4_hv_8bpc_c: 832.2
mct_8tap_regular_w4_hv_8bpc_ssse3: 113.4
mct_8tap_regular_w4_hv_8bpc_avx2: 53.1
------------------------------------------
mct_8tap_regular_w4_v_8bpc_c: 488.5
mct_8tap_regular_w4_v_8bpc_ssse3: 38.9
mct_8tap_regular_w4_v_8bpc_avx2: 26.0
------------------------------------------
mct_8tap_regular_w8_0_8bpc_c: 259.3
mct_8tap_regular_w8_0_8bpc_ssse3: 20.4
mct_8tap_regular_w8_0_8bpc_avx2: 18.0
------------------------------------------
mct_8tap_regular_w8_h_8bpc_c: 1124.3
mct_8tap_regular_w8_h_8bpc_ssse3: 67.7
mct_8tap_regular_w8_h_8bpc_avx2: 43.3
------------------------------------------
mct_8tap_regular_w8_hv_8bpc_c: 2155.0
mct_8tap_regular_w8_hv_8bpc_ssse3: 340.8
mct_8tap_regular_w8_hv_8bpc_avx2: 151.3
------------------------------------------
mct_8tap_regular_w8_v_8bpc_c: 1195.4
mct_8tap_regular_w8_v_8bpc_ssse3: 72.4
mct_8tap_regular_w8_v_8bpc_avx2: 39.8
------------------------------------------
mct_8tap_regular_w16_0_8bpc_c: 158.3
mct_8tap_regular_w16_0_8bpc_ssse3: 52.9
mct_8tap_regular_w16_0_8bpc_avx2: 30.2
------------------------------------------
mct_8tap_regular_w16_h_8bpc_c: 4267.4
mct_8tap_regular_w16_h_8bpc_ssse3: 211.9
mct_8tap_regular_w16_h_8bpc_avx2: 121.4
------------------------------------------
mct_8tap_regular_w16_hv_8bpc_c: 5430.9
mct_8tap_regular_w16_hv_8bpc_ssse3: 986.8
mct_8tap_regular_w16_hv_8bpc_avx2: 428.4
------------------------------------------
mct_8tap_regular_w16_v_8bpc_c: 4604.2
mct_8tap_regular_w16_v_8bpc_ssse3: 199.1
mct_8tap_regular_w16_v_8bpc_avx2: 100.7
------------------------------------------
mct_8tap_regular_w32_0_8bpc_c: 372.9
mct_8tap_regular_w32_0_8bpc_ssse3: 231.9
mct_8tap_regular_w32_0_8bpc_avx2: 99.7
------------------------------------------
mct_8tap_regular_w32_h_8bpc_c: 15975.0
mct_8tap_regular_w32_h_8bpc_ssse3: 802.9
mct_8tap_regular_w32_h_8bpc_avx2: 468.5
------------------------------------------
mct_8tap_regular_w32_hv_8bpc_c: 18555.5
mct_8tap_regular_w32_hv_8bpc_ssse3: 3673.5
mct_8tap_regular_w32_hv_8bpc_avx2: 1587.6
------------------------------------------
mct_8tap_regular_w32_v_8bpc_c: 16632.4
mct_8tap_regular_w32_v_8bpc_ssse3: 743.5
mct_8tap_regular_w32_v_8bpc_avx2: 337.8
------------------------------------------
mct_8tap_regular_w64_0_8bpc_c: 675.9
mct_8tap_regular_w64_0_8bpc_ssse3: 513.6
mct_8tap_regular_w64_0_8bpc_avx2: 285.4
------------------------------------------
mct_8tap_regular_w64_h_8bpc_c: 37161.3
mct_8tap_regular_w64_h_8bpc_ssse3: 1929.7
mct_8tap_regular_w64_h_8bpc_avx2: 1138.1
------------------------------------------
mct_8tap_regular_w64_hv_8bpc_c: 42434.0
mct_8tap_regular_w64_hv_8bpc_ssse3: 8822.1
mct_8tap_regular_w64_hv_8bpc_avx2: 3853.5
------------------------------------------
mct_8tap_regular_w64_v_8bpc_c: 37969.1
mct_8tap_regular_w64_v_8bpc_ssse3: 1805.6
mct_8tap_regular_w64_v_8bpc_avx2: 826.1
------------------------------------------
mct_8tap_regular_w128_0_8bpc_c: 1532.7
mct_8tap_regular_w128_0_8bpc_ssse3: 1397.7
mct_8tap_regular_w128_0_8bpc_avx2: 813.8
------------------------------------------
mct_8tap_regular_w128_h_8bpc_c: 91204.3
mct_8tap_regular_w128_h_8bpc_ssse3: 4783.0
mct_8tap_regular_w128_h_8bpc_avx2: 2767.2
------------------------------------------
mct_8tap_regular_w128_hv_8bpc_c: 102396.0
mct_8tap_regular_w128_hv_8bpc_ssse3: 22202.3
mct_8tap_regular_w128_hv_8bpc_avx2: 9637.2
------------------------------------------
mct_8tap_regular_w128_v_8bpc_c: 92294.3
mct_8tap_regular_w128_v_8bpc_ssse3: 4952.8
mct_8tap_regular_w128_v_8bpc_avx2: 2370.1
------------------------------------------

---------------------
x86_32:
------------------------------------------
mct_8tap_regular_w4_0_8bpc_c: 131.3
mct_8tap_regular_w4_0_8bpc_ssse3: 18.7
------------------------------------------
mct_8tap_regular_w4_h_8bpc_c: 422.0
mct_8tap_regular_w4_h_8bpc_ssse3: 27.3
------------------------------------------
mct_8tap_regular_w4_hv_8bpc_c: 1012.6
mct_8tap_regular_w4_hv_8bpc_ssse3: 123.6
------------------------------------------
mct_8tap_regular_w4_v_8bpc_c: 589.6
mct_8tap_regular_w4_v_8bpc_ssse3: 48.9
------------------------------------------
mct_8tap_regular_w8_0_8bpc_c: 278.5
mct_8tap_regular_w8_0_8bpc_ssse3: 26.3
------------------------------------------
mct_8tap_regular_w8_h_8bpc_c: 1129.3
mct_8tap_regular_w8_h_8bpc_ssse3: 80.6
------------------------------------------
mct_8tap_regular_w8_hv_8bpc_c: 2556.4
mct_8tap_regular_w8_hv_8bpc_ssse3: 354.6
------------------------------------------
mct_8tap_regular_w8_v_8bpc_c: 1460.2
mct_8tap_regular_w8_v_8bpc_ssse3: 103.8
------------------------------------------
mct_8tap_regular_w16_0_8bpc_c: 218.9
mct_8tap_regular_w16_0_8bpc_ssse3: 58.4
------------------------------------------
mct_8tap_regular_w16_h_8bpc_c: 4471.8
mct_8tap_regular_w16_h_8bpc_ssse3: 237.2
------------------------------------------
mct_8tap_regular_w16_hv_8bpc_c: 5570.5
mct_8tap_regular_w16_hv_8bpc_ssse3: 1044.1
------------------------------------------
mct_8tap_regular_w16_v_8bpc_c: 4885.5
mct_8tap_regular_w16_v_8bpc_ssse3: 268.3
------------------------------------------
mct_8tap_regular_w32_0_8bpc_c: 495.6
mct_8tap_regular_w32_0_8bpc_ssse3: 236.6
------------------------------------------
mct_8tap_regular_w32_h_8bpc_c: 15903.5
mct_8tap_regular_w32_h_8bpc_ssse3: 872.5
------------------------------------------
mct_8tap_regular_w32_hv_8bpc_c: 19402.2
mct_8tap_regular_w32_hv_8bpc_ssse3: 3832.8
------------------------------------------
mct_8tap_regular_w32_v_8bpc_c: 17119.5
mct_8tap_regular_w32_v_8bpc_ssse3: 935.2
------------------------------------------
mct_8tap_regular_w64_0_8bpc_c: 877.0
mct_8tap_regular_w64_0_8bpc_ssse3: 515.7
------------------------------------------
mct_8tap_regular_w64_h_8bpc_c: 36832.1
mct_8tap_regular_w64_h_8bpc_ssse3: 2094.1
------------------------------------------
mct_8tap_regular_w64_hv_8bpc_c: 43965.3
mct_8tap_regular_w64_hv_8bpc_ssse3: 9423.0
------------------------------------------
mct_8tap_regular_w64_v_8bpc_c: 37041.2
mct_8tap_regular_w64_v_8bpc_ssse3: 2348.9
------------------------------------------
mct_8tap_regular_w128_0_8bpc_c: 1929.9
mct_8tap_regular_w128_0_8bpc_ssse3: 1392.3
------------------------------------------
mct_8tap_regular_w128_h_8bpc_c: 86022.5
mct_8tap_regular_w128_h_8bpc_ssse3: 5110.8
------------------------------------------
mct_8tap_regular_w128_hv_8bpc_c: 105793.5
mct_8tap_regular_w128_hv_8bpc_ssse3: 23278.8
------------------------------------------
mct_8tap_regular_w128_v_8bpc_c: 88223.5
mct_8tap_regular_w128_v_8bpc_ssse3: 7442.7
------------------------------------------
parent 65ee1233
Pipeline #5237 passed with stages
in 5 minutes and 42 seconds
......@@ -50,14 +50,23 @@ decl_mc_fn(dav1d_put_bilin_avx2);
decl_mc_fn(dav1d_put_bilin_ssse3);
decl_mct_fn(dav1d_prep_8tap_regular_avx2);
decl_mct_fn(dav1d_prep_8tap_regular_ssse3);
decl_mct_fn(dav1d_prep_8tap_regular_smooth_avx2);
decl_mct_fn(dav1d_prep_8tap_regular_smooth_ssse3);
decl_mct_fn(dav1d_prep_8tap_regular_sharp_avx2);
decl_mct_fn(dav1d_prep_8tap_regular_sharp_ssse3);
decl_mct_fn(dav1d_prep_8tap_smooth_avx2);
decl_mct_fn(dav1d_prep_8tap_smooth_ssse3);
decl_mct_fn(dav1d_prep_8tap_smooth_regular_avx2);
decl_mct_fn(dav1d_prep_8tap_smooth_regular_ssse3);
decl_mct_fn(dav1d_prep_8tap_smooth_sharp_avx2);
decl_mct_fn(dav1d_prep_8tap_smooth_sharp_ssse3);
decl_mct_fn(dav1d_prep_8tap_sharp_avx2);
decl_mct_fn(dav1d_prep_8tap_sharp_ssse3);
decl_mct_fn(dav1d_prep_8tap_sharp_regular_avx2);
decl_mct_fn(dav1d_prep_8tap_sharp_regular_ssse3);
decl_mct_fn(dav1d_prep_8tap_sharp_smooth_avx2);
decl_mct_fn(dav1d_prep_8tap_sharp_smooth_ssse3);
decl_mct_fn(dav1d_prep_bilin_avx2);
decl_mct_fn(dav1d_prep_bilin_ssse3);
......@@ -108,6 +117,15 @@ void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
init_mc_fn (FILTER_2D_8TAP_SHARP, 8tap_sharp, ssse3);
init_mct_fn(FILTER_2D_BILINEAR, bilin, ssse3);
init_mct_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, ssse3);
init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, ssse3);
init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, ssse3);
init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, ssse3);
init_mct_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, ssse3);
init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, ssse3);
init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, ssse3);
init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, ssse3);
init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, ssse3);
c->avg = dav1d_avg_ssse3;
c->w_avg = dav1d_w_avg_ssse3;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment