Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
VideoLAN
dav1d
Commits
0636633f
Commit
0636633f
authored
Nov 30, 2018
by
François Cartegnie
🤞
Browse files
add SSSE3 blend_h
parent
fef13fd6
Pipeline
#3459
passed with stages
in 4 minutes and 47 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/x86/mc_init_tmpl.c
View file @
0636633f
...
...
@@ -63,6 +63,7 @@ decl_blend_fn(dav1d_blend_ssse3);
decl_blend_dir_fn
(
dav1d_blend_v_avx2
);
decl_blend_dir_fn
(
dav1d_blend_v_ssse3
);
decl_blend_dir_fn
(
dav1d_blend_h_avx2
);
decl_blend_dir_fn
(
dav1d_blend_h_ssse3
);
decl_warp8x8_fn
(
dav1d_warp_affine_8x8_avx2
);
decl_warp8x8t_fn
(
dav1d_warp_affine_8x8t_avx2
);
...
...
@@ -87,6 +88,7 @@ void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
c
->
w_mask
[
2
]
=
dav1d_w_mask_420_ssse3
;
c
->
blend
=
dav1d_blend_ssse3
;
c
->
blend_v
=
dav1d_blend_v_ssse3
;
c
->
blend_h
=
dav1d_blend_h_ssse3
;
#endif
if
(
!
(
flags
&
DAV1D_X86_CPU_FLAG_AVX2
))
...
...
src/x86/mc_ssse3.asm
View file @
0636633f
...
...
@@ -45,6 +45,7 @@ obmc_masks: db 0, 0, 0, 0
db
45
,
19
,
47
,
17
,
48
,
16
,
50
,
14
,
51
,
13
,
52
,
12
,
53
,
11
,
55
,
9
db
56
,
8
,
57
,
7
,
58
,
6
,
59
,
5
,
60
,
4
,
60
,
4
,
61
,
3
,
62
,
2
db
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
blend_shuf:
db
0
,
1
,
0
,
1
,
0
,
1
,
0
,
1
,
2
,
3
,
2
,
3
,
2
,
3
,
2
,
3
pb_64:
times
16
db
64
pw_8:
times
8
dw
8
...
...
@@ -73,6 +74,7 @@ BIDIR_JMP_TABLE mask_ssse3, 4, 8, 16, 32, 64, 128
BIDIR_JMP_TABLE
w_mask_420_ssse3
,
4
,
8
,
16
,
16
,
16
,
16
BIDIR_JMP_TABLE
bl
end_ssse3
,
4
,
8
,
16
,
32
BIDIR_JMP_TABLE
bl
end_v_ssse3
,
2
,
4
,
8
,
16
,
32
BIDIR_JMP_TABLE
bl
end_h_ssse3
,
2
,
4
,
8
,
16
,
16
,
16
,
16
SECTION
.text
...
...
@@ -643,3 +645,93 @@ cglobal blend_v, 3, 6, 8, dst, ds, tmp, w, h, mask
dec
hd
jg
.w32_loop
RET
cglobal
bl
end_h
,
4
,
7
,
6
,
ds
t
,
ds
,
tmp
,
w
,
h
,
mask
%define base r5-blend_h_ssse3_table
lea
r5
,
[
bl
end_h_ssse3_table
]
mov
r6d
,
wd
tzcnt
wd
,
wd
mov
hd
,
hm
movsxd
wq
,
dword
[
r5
+
wq
*
4
]
mova
m5
,
[
base
+
pw_512
]
add
wq
,
r5
lea
maskq
,
[
base
+
obmc_masks
+
hq
*
4
]
neg
hq
jmp
wq
.w2:
movd
m0
,
[
ds
tq
+
ds
q
*
0
]
pinsrw
m0
,
[
ds
tq
+
ds
q
*
1
],
1
movd
m2
,
[
maskq
+
hq
*
2
]
movd
m1
,
[
tmpq
]
punpcklwd
m2
,
m2
punpcklbw
m0
,
m1
pmaddubsw
m0
,
m2
pmulhrsw
m0
,
m5
packuswb
m0
,
m0
movd
r3d
,
m0
mov
[
ds
tq
+
ds
q
*
0
],
r3w
shr
r3d
,
16
mov
[
ds
tq
+
ds
q
*
1
],
r3w
lea
ds
tq
,
[
ds
tq
+
ds
q
*
2
]
add
tmpq
,
2
*
2
add
hq
,
2
jl
.w2
RET
.w4:
mova
m3
,
[
bl
end_shuf
]
.w4_loop:
movd
m0
,
[
ds
tq
+
ds
q
*
0
]
movd
m2
,
[
ds
tq
+
ds
q
*
1
]
punpckldq
m0
,
m2
; a
movq
m1
,
[
tmpq
]
; b
movq
m2
,
[
maskq
+
hq
*
2
]
; m
pshufb
m2
,
m3
punpcklbw
m0
,
m1
pmaddubsw
m0
,
m2
pmulhrsw
m0
,
m5
packuswb
m0
,
m0
movd
[
ds
tq
+
ds
q
*
0
],
m0
psrlq
m0
,
32
movd
[
ds
tq
+
ds
q
*
1
],
m0
lea
ds
tq
,
[
ds
tq
+
ds
q
*
2
]
add
tmpq
,
4
*
2
add
hq
,
2
jl
.w4_loop
RET
.w8:
movd
m4
,
[
maskq
+
hq
*
2
]
punpcklwd
m4
,
m4
pshufd
m3
,
m4
,
q0000
pshufd
m4
,
m4
,
q1111
movq
m1
,
[
ds
tq
+
ds
q
*
0
]
; a
movhps
m1
,
[
ds
tq
+
ds
q
*
1
]
mova
m2
,
[
tmpq
]
BLEND_64M
m1
,
m2
,
m3
,
m4
movq
[
ds
tq
+
ds
q
*
0
],
m0
movhps
[
ds
tq
+
ds
q
*
1
],
m0
lea
ds
tq
,
[
ds
tq
+
ds
q
*
2
]
add
tmpq
,
8
*
2
add
hq
,
2
jl
.w8
RET
; w16/w32/w64/w128
.w16:
sub
ds
q
,
r6
.w16_loop0:
movd
m3
,
[
maskq
+
hq
*
2
]
pshuflw
m3
,
m3
,
q0000
punpcklqdq
m3
,
m3
mov
wd
,
r6d
.w16_loop:
mova
m1
,
[
ds
tq
]
; a
mova
m2
,
[
tmpq
]
; b
BLEND_64M
m1
,
m2
,
m3
,
m3
mova
[
ds
tq
],
m0
add
ds
tq
,
16
add
tmpq
,
16
sub
wd
,
16
jg
.w16_loop
add
ds
tq
,
ds
q
inc
hq
jl
.w16_loop0
RET
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment