• Liwei Wang's avatar
    Add SSSE3 implementation for the 8x16 and 16x8 blocks in itx · a532e5ae
    Liwei Wang authored
    Cycle times:
    inv_txfm_add_8x16_adst_adst_0_8bpc_c: 5063.0
    inv_txfm_add_8x16_adst_adst_0_8bpc_ssse3: 406.8
    inv_txfm_add_8x16_adst_adst_1_8bpc_c: 5051.2
    inv_txfm_add_8x16_adst_adst_1_8bpc_ssse3: 407.3
    inv_txfm_add_8x16_adst_adst_2_8bpc_c: 5065.4
    inv_txfm_add_8x16_adst_adst_2_8bpc_ssse3: 407.9
    inv_txfm_add_8x16_adst_dct_0_8bpc_c: 5201.1
    inv_txfm_add_8x16_adst_dct_0_8bpc_ssse3: 354.8
    inv_txfm_add_8x16_adst_dct_1_8bpc_c: 5214.8
    inv_txfm_add_8x16_adst_dct_1_8bpc_ssse3: 354.8
    inv_txfm_add_8x16_adst_dct_2_8bpc_c: 5225.0
    inv_txfm_add_8x16_adst_dct_2_8bpc_ssse3: 355.1
    inv_txfm_add_8x16_adst_flipadst_0_8bpc_c: 7135.9
    inv_txfm_add_8x16_adst_flipadst_0_8bpc_ssse3: 409.7
    inv_txfm_add_8x16_adst_flipadst_1_8bpc_c: 8354.4
    inv_txfm_add_8x16_adst_flipadst_1_8bpc_ssse3: 409.2
    inv_txfm_add_8x16_adst_flipadst_2_8bpc_c: 7198.7
    inv_txfm_add_8x16_adst_flipadst_2_8bpc_ssse3: 409.7
    inv_txfm_add_8x16_adst_identity_0_8bpc_c: 3936.5
    inv_txfm_add_8x16_adst_identity_0_8bpc_ssse3: 262.0
    inv_txfm_add_8x16_adst_identity_1_8bpc_c: 4617.8
    inv_txfm_add_8x16_adst_identity_1_8bpc_ssse3: 261.4
    inv_txfm_add_8x16_adst_identity_2_8bpc_c: 3895.1
    inv_txfm_add_8x16_adst_identity_2_8bpc_ssse3: 262.1
    inv_txfm_add_8x16_dct_adst_0_8bpc_c: 5203.9
    inv_txfm_add_8x16_dct_adst_0_8bpc_ssse3: 355.1
    inv_txfm_add_8x16_dct_adst_1_8bpc_c: 5200.8
    inv_txfm_add_8x16_dct_adst_1_8bpc_ssse3: 355.4
    inv_txfm_add_8x16_dct_adst_2_8bpc_c: 5208.2
    inv_txfm_add_8x16_dct_adst_2_8bpc_ssse3: 355.1
    inv_txfm_add_8x16_dct_dct_0_8bpc_c: 5270.8
    inv_txfm_add_8x16_dct_dct_0_8bpc_ssse3: 57.0
    inv_txfm_add_8x16_dct_dct_1_8bpc_c: 5280.9
    inv_txfm_add_8x16_dct_dct_1_8bpc_ssse3: 303.2
    inv_txfm_add_8x16_dct_dct_2_8bpc_c: 5275.9
    inv_txfm_add_8x16_dct_dct_2_8bpc_ssse3: 302.4
    inv_txfm_add_8x16_dct_flipadst_0_8bpc_c: 5374.4
    inv_txfm_add_8x16_dct_flipadst_0_8bpc_ssse3: 356.5
    inv_txfm_add_8x16_dct_flipadst_1_8bpc_c: 5449.9
    inv_txfm_add_8x16_dct_flipadst_1_8bpc_ssse3: 356.8
    inv_txfm_add_8x16_dct_flipadst_2_8bpc_c: 5446.9
    inv_txfm_add_8x16_dct_flipadst_2_8bpc_ssse3: 356.7
    inv_txfm_add_8x16_dct_identity_0_8bpc_c: 3883.4
    inv_txfm_add_8x16_dct_identity_0_8bpc_ssse3: 76.1
    inv_txfm_add_8x16_dct_identity_1_8bpc_c: 3892.3
    inv_txfm_add_8x16_dct_identity_1_8bpc_ssse3: 76.1
    inv_txfm_add_8x16_dct_identity_2_8bpc_c: 4027.1
    inv_txfm_add_8x16_dct_identity_2_8bpc_ssse3: 209.9
    inv_txfm_add_8x16_flipadst_adst_0_8bpc_c: 7387.5
    inv_txfm_add_8x16_flipadst_adst_0_8bpc_ssse3: 408.9
    inv_txfm_add_8x16_flipadst_adst_1_8bpc_c: 7298.8
    inv_txfm_add_8x16_flipadst_adst_1_8bpc_ssse3: 408.8
    inv_txfm_add_8x16_flipadst_adst_2_8bpc_c: 7397.2
    inv_txfm_add_8x16_flipadst_adst_2_8bpc_ssse3: 408.9
    inv_txfm_add_8x16_flipadst_dct_0_8bpc_c: 5250.4
    inv_txfm_add_8x16_flipadst_dct_0_8bpc_ssse3: 355.3
    inv_txfm_add_8x16_flipadst_dct_1_8bpc_c: 5263.9
    inv_txfm_add_8x16_flipadst_dct_1_8bpc_ssse3: 355.4
    inv_txfm_add_8x16_flipadst_dct_2_8bpc_c: 5259.0
    inv_txfm_add_8x16_flipadst_dct_2_8bpc_ssse3: 356.3
    inv_txfm_add_8x16_flipadst_flipadst_0_8bpc_c: 5448.4
    inv_txfm_add_8x16_flipadst_flipadst_0_8bpc_ssse3: 410.2
    inv_txfm_add_8x16_flipadst_flipadst_1_8bpc_c: 5402.6
    inv_txfm_add_8x16_flipadst_flipadst_1_8bpc_ssse3: 410.8
    inv_txfm_add_8x16_flipadst_flipadst_2_8bpc_c: 6479.7
    inv_txfm_add_8x16_flipadst_flipadst_2_8bpc_ssse3: 409.8
    inv_txfm_add_8x16_flipadst_identity_0_8bpc_c: 3828.9
    inv_txfm_add_8x16_flipadst_identity_0_8bpc_ssse3: 262.7
    inv_txfm_add_8x16_flipadst_identity_1_8bpc_c: 3884.5
    inv_txfm_add_8x16_flipadst_identity_1_8bpc_ssse3: 262.0
    inv_txfm_add_8x16_flipadst_identity_2_8bpc_c: 3809.2
    inv_txfm_add_8x16_flipadst_identity_2_8bpc_ssse3: 262.9
    inv_txfm_add_8x16_identity_adst_0_8bpc_c: 4294.5
    inv_txfm_add_8x16_identity_adst_0_8bpc_ssse3: 268.8
    inv_txfm_add_8x16_identity_adst_1_8bpc_c: 4955.4
    inv_txfm_add_8x16_identity_adst_1_8bpc_ssse3: 269.1
    inv_txfm_add_8x16_identity_adst_2_8bpc_c: 4166.4
    inv_txfm_add_8x16_identity_adst_2_8bpc_ssse3: 269.9
    inv_txfm_add_8x16_identity_dct_0_8bpc_c: 4012.3
    inv_txfm_add_8x16_identity_dct_0_8bpc_ssse3: 56.7
    inv_txfm_add_8x16_identity_dct_1_8bpc_c: 4767.1
    inv_txfm_add_8x16_identity_dct_1_8bpc_ssse3: 215.1
    inv_txfm_add_8x16_identity_dct_2_8bpc_c: 4012.6
    inv_txfm_add_8x16_identity_dct_2_8bpc_ssse3: 215.9
    inv_txfm_add_8x16_identity_flipadst_0_8bpc_c: 4452.6
    inv_txfm_add_8x16_identity_flipadst_0_8bpc_ssse3: 270.5
    inv_txfm_add_8x16_identity_flipadst_1_8bpc_c: 4885.8
    inv_txfm_add_8x16_identity_flipadst_1_8bpc_ssse3: 270.3
    inv_txfm_add_8x16_identity_flipadst_2_8bpc_c: 4186.1
    inv_txfm_add_8x16_identity_flipadst_2_8bpc_ssse3: 271.5
    inv_txfm_add_8x16_identity_identity_0_8bpc_c: 2623.0
    inv_txfm_add_8x16_identity_identity_0_8bpc_ssse3: 123.1
    inv_txfm_add_8x16_identity_identity_1_8bpc_c: 2617.7
    inv_txfm_add_8x16_identity_identity_1_8bpc_ssse3: 122.9
    inv_txfm_add_8x16_identity_identity_2_8bpc_c: 2617.2
    inv_txfm_add_8x16_identity_identity_2_8bpc_ssse3: 123.1
    inv_txfm_add_16x8_adst_adst_0_8bpc_c: 5102.3
    inv_txfm_add_16x8_adst_adst_0_8bpc_ssse3: 409.0
    inv_txfm_add_16x8_adst_adst_1_8bpc_c: 5063.2
    inv_txfm_add_16x8_adst_adst_1_8bpc_ssse3: 409.5
    inv_txfm_add_16x8_adst_adst_2_8bpc_c: 5029.1
    inv_txfm_add_16x8_adst_adst_2_8bpc_ssse3: 410.1
    inv_txfm_add_16x8_adst_dct_0_8bpc_c: 5848.8
    inv_txfm_add_16x8_adst_dct_0_8bpc_ssse3: 358.8
    inv_txfm_add_16x8_adst_dct_1_8bpc_c: 5612.8
    inv_txfm_add_16x8_adst_dct_1_8bpc_ssse3: 358.8
    inv_txfm_add_16x8_adst_dct_2_8bpc_c: 5143.2
    inv_txfm_add_16x8_adst_dct_2_8bpc_ssse3: 358.5
    inv_txfm_add_16x8_adst_flipadst_0_8bpc_c: 5072.4
    inv_txfm_add_16x8_adst_flipadst_0_8bpc_ssse3: 413.3
    inv_txfm_add_16x8_adst_flipadst_1_8bpc_c: 5082.2
    inv_txfm_add_16x8_adst_flipadst_1_8bpc_ssse3: 413.6
    inv_txfm_add_16x8_adst_flipadst_2_8bpc_c: 5108.0
    inv_txfm_add_16x8_adst_flipadst_2_8bpc_ssse3: 413.8
    inv_txfm_add_16x8_adst_identity_0_8bpc_c: 3897.2
    inv_txfm_add_16x8_adst_identity_0_8bpc_ssse3: 283.6
    inv_txfm_add_16x8_adst_identity_1_8bpc_c: 3947.2
    inv_txfm_add_16x8_adst_identity_1_8bpc_ssse3: 283.1
    inv_txfm_add_16x8_adst_identity_2_8bpc_c: 3881.7
    inv_txfm_add_16x8_adst_identity_2_8bpc_ssse3: 283.6
    inv_txfm_add_16x8_dct_adst_0_8bpc_c: 5200.7
    inv_txfm_add_16x8_dct_adst_0_8bpc_ssse3: 355.0
    inv_txfm_add_16x8_dct_adst_1_8bpc_c: 5261.0
    inv_txfm_add_16x8_dct_adst_1_8bpc_ssse3: 355.1
    inv_txfm_add_16x8_dct_adst_2_8bpc_c: 5212.5
    inv_txfm_add_16x8_dct_adst_2_8bpc_ssse3: 354.5
    inv_txfm_add_16x8_dct_dct_0_8bpc_c: 5252.9
    inv_txfm_add_16x8_dct_dct_0_8bpc_ssse3: 43.6
    inv_txfm_add_16x8_dct_dct_1_8bpc_c: 5260.0
    inv_txfm_add_16x8_dct_dct_1_8bpc_ssse3: 302.1
    inv_txfm_add_16x8_dct_dct_2_8bpc_c: 5250.4
    inv_txfm_add_16x8_dct_dct_2_8bpc_ssse3: 302.0
    inv_txfm_add_16x8_dct_flipadst_0_8bpc_c: 5216.6
    inv_txfm_add_16x8_dct_flipadst_0_8bpc_ssse3: 359.3
    inv_txfm_add_16x8_dct_flipadst_1_8bpc_c: 5229.9
    inv_txfm_add_16x8_dct_flipadst_1_8bpc_ssse3: 357.6
    inv_txfm_add_16x8_dct_flipadst_2_8bpc_c: 5261.4
    inv_txfm_add_16x8_dct_flipadst_2_8bpc_ssse3: 357.4
    inv_txfm_add_16x8_dct_identity_0_8bpc_c: 3999.2
    inv_txfm_add_16x8_dct_identity_0_8bpc_ssse3: 63.8
    inv_txfm_add_16x8_dct_identity_1_8bpc_c: 4018.1
    inv_txfm_add_16x8_dct_identity_1_8bpc_ssse3: 227.1
    inv_txfm_add_16x8_dct_identity_2_8bpc_c: 3998.7
    inv_txfm_add_16x8_dct_identity_2_8bpc_ssse3: 226.2
    inv_txfm_add_16x8_flipadst_adst_0_8bpc_c: 5124.9
    inv_txfm_add_16x8_flipadst_adst_0_8bpc_ssse3: 419.7
    inv_txfm_add_16x8_flipadst_adst_1_8bpc_c: 5100.7
    inv_txfm_add_16x8_flipadst_adst_1_8bpc_ssse3: 420.5
    inv_txfm_add_16x8_flipadst_adst_2_8bpc_c: 5087.1
    inv_txfm_add_16x8_flipadst_adst_2_8bpc_ssse3: 419.9
    inv_txfm_add_16x8_flipadst_dct_0_8bpc_c: 5183.2
    inv_txfm_add_16x8_flipadst_dct_0_8bpc_ssse3: 367.1
    inv_txfm_add_16x8_flipadst_dct_1_8bpc_c: 5193.7
    inv_txfm_add_16x8_flipadst_dct_1_8bpc_ssse3: 368.6
    inv_txfm_add_16x8_flipadst_dct_2_8bpc_c: 5186.8
    inv_txfm_add_16x8_flipadst_dct_2_8bpc_ssse3: 368.4
    inv_txfm_add_16x8_flipadst_flipadst_0_8bpc_c: 5091.3
    inv_txfm_add_16x8_flipadst_flipadst_0_8bpc_ssse3: 421.2
    inv_txfm_add_16x8_flipadst_flipadst_1_8bpc_c: 5118.5
    inv_txfm_add_16x8_flipadst_flipadst_1_8bpc_ssse3: 421.4
    inv_txfm_add_16x8_flipadst_flipadst_2_8bpc_c: 5119.0
    inv_txfm_add_16x8_flipadst_flipadst_2_8bpc_ssse3: 421.2
    inv_txfm_add_16x8_flipadst_identity_0_8bpc_c: 3909.3
    inv_txfm_add_16x8_flipadst_identity_0_8bpc_ssse3: 289.9
    inv_txfm_add_16x8_flipadst_identity_1_8bpc_c: 3920.7
    inv_txfm_add_16x8_flipadst_identity_1_8bpc_ssse3: 290.4
    inv_txfm_add_16x8_flipadst_identity_2_8bpc_c: 3936.7
    inv_txfm_add_16x8_flipadst_identity_2_8bpc_ssse3: 290.6
    inv_txfm_add_16x8_identity_adst_0_8bpc_c: 3869.3
    inv_txfm_add_16x8_identity_adst_0_8bpc_ssse3: 280.0
    inv_txfm_add_16x8_identity_adst_1_8bpc_c: 3832.2
    inv_txfm_add_16x8_identity_adst_1_8bpc_ssse3: 281.4
    inv_txfm_add_16x8_identity_adst_2_8bpc_c: 3820.8
    inv_txfm_add_16x8_identity_adst_2_8bpc_ssse3: 281.5
    inv_txfm_add_16x8_identity_dct_0_8bpc_c: 3878.6
    inv_txfm_add_16x8_identity_dct_0_8bpc_ssse3: 76.7
    inv_txfm_add_16x8_identity_dct_1_8bpc_c: 3883.3
    inv_txfm_add_16x8_identity_dct_1_8bpc_ssse3: 76.3
    inv_txfm_add_16x8_identity_dct_2_8bpc_c: 3900.6
    inv_txfm_add_16x8_identity_dct_2_8bpc_ssse3: 220.1
    inv_txfm_add_16x8_identity_flipadst_0_8bpc_c: 3840.9
    inv_txfm_add_16x8_identity_flipadst_0_8bpc_ssse3: 277.1
    inv_txfm_add_16x8_identity_flipadst_1_8bpc_c: 3860.6
    inv_txfm_add_16x8_identity_flipadst_1_8bpc_ssse3: 277.0
    inv_txfm_add_16x8_identity_flipadst_2_8bpc_c: 3849.4
    inv_txfm_add_16x8_identity_flipadst_2_8bpc_ssse3: 277.2
    inv_txfm_add_16x8_identity_identity_0_8bpc_c: 2610.9
    inv_txfm_add_16x8_identity_identity_0_8bpc_ssse3: 159.8
    inv_txfm_add_16x8_identity_identity_1_8bpc_c: 2597.1
    inv_txfm_add_16x8_identity_identity_1_8bpc_ssse3: 159.8
    inv_txfm_add_16x8_identity_identity_2_8bpc_c: 2607.9
    inv_txfm_add_16x8_identity_identity_2_8bpc_ssse3: 159.9
    a532e5ae
Name
Last commit
Last update
..
arm Loading commit data...
ext/x86 Loading commit data...
win32 Loading commit data...
x86 Loading commit data...
cdef.h Loading commit data...
cdef_apply.h Loading commit data...
cdef_apply_tmpl.c Loading commit data...
cdef_tmpl.c Loading commit data...
cdf.c Loading commit data...
cdf.h Loading commit data...
cpu.c Loading commit data...
cpu.h Loading commit data...
ctx.h Loading commit data...
data.c Loading commit data...
data.h Loading commit data...
dav1d.rc.in Loading commit data...
decode.c Loading commit data...
decode.h Loading commit data...
dequant_tables.c Loading commit data...
dequant_tables.h Loading commit data...
env.h Loading commit data...
film_grain.h Loading commit data...
film_grain_tmpl.c Loading commit data...
getbits.c Loading commit data...
getbits.h Loading commit data...
internal.h Loading commit data...
intra_edge.c Loading commit data...
intra_edge.h Loading commit data...
ipred.h Loading commit data...
ipred_prepare.h Loading commit data...
ipred_prepare_tmpl.c Loading commit data...
ipred_tmpl.c Loading commit data...
itx.h Loading commit data...
itx_1d.c Loading commit data...
itx_tmpl.c Loading commit data...
levels.h Loading commit data...
lf_apply.h Loading commit data...
lf_apply_tmpl.c Loading commit data...
lf_mask.c Loading commit data...
lf_mask.h Loading commit data...
lib.c Loading commit data...
log.c Loading commit data...
log.h Loading commit data...
loopfilter.h Loading commit data...
loopfilter_tmpl.c Loading commit data...
looprestoration.h Loading commit data...
looprestoration_tmpl.c Loading commit data...
lr_apply.h Loading commit data...
lr_apply_tmpl.c Loading commit data...
mc.h Loading commit data...
mc_tmpl.c Loading commit data...
meson.build Loading commit data...
msac.c Loading commit data...
msac.h Loading commit data...
obu.c Loading commit data...
obu.h Loading commit data...
picture.c Loading commit data...
picture.h Loading commit data...
qm.c Loading commit data...
qm.h Loading commit data...
recon.h Loading commit data...
recon_tmpl.c Loading commit data...
ref.c Loading commit data...
ref.h Loading commit data...
ref_mvs.c Loading commit data...
ref_mvs.h Loading commit data...
scan.c Loading commit data...
scan.h Loading commit data...
tables.c Loading commit data...
tables.h Loading commit data...
thread.h Loading commit data...
thread_data.h Loading commit data...
thread_task.c Loading commit data...
thread_task.h Loading commit data...
warpmv.c Loading commit data...
warpmv.h Loading commit data...
wedge.c Loading commit data...
wedge.h Loading commit data...