Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
dav1d
Manage
Activity
Members
Labels
Plan
Issues
26
Issue boards
Milestones
Wiki
Code
Merge requests
15
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Analyze
Contributor analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
VideoLAN
dav1d
Commits
0636633f
Commit
0636633f
authored
6 years ago
by
François Cartegnie
Browse files
Options
Downloads
Patches
Plain Diff
add SSSE3 blend_h
parent
fef13fd6
No related branches found
No related tags found
1 merge request
!454
add SSSE3 w_mask_420/blend/blend_v/blend_h
Pipeline
#3459
passed with stages
Stage:
Stage:
in 4 minutes and 47 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/x86/mc_init_tmpl.c
+2
-0
2 additions, 0 deletions
src/x86/mc_init_tmpl.c
src/x86/mc_ssse3.asm
+92
-0
92 additions, 0 deletions
src/x86/mc_ssse3.asm
with
94 additions
and
0 deletions
src/x86/mc_init_tmpl.c
+
2
−
0
View file @
0636633f
...
...
@@ -63,6 +63,7 @@ decl_blend_fn(dav1d_blend_ssse3);
decl_blend_dir_fn
(
dav1d_blend_v_avx2
);
decl_blend_dir_fn
(
dav1d_blend_v_ssse3
);
decl_blend_dir_fn
(
dav1d_blend_h_avx2
);
decl_blend_dir_fn
(
dav1d_blend_h_ssse3
);
decl_warp8x8_fn
(
dav1d_warp_affine_8x8_avx2
);
decl_warp8x8t_fn
(
dav1d_warp_affine_8x8t_avx2
);
...
...
@@ -87,6 +88,7 @@ void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
c
->
w_mask
[
2
]
=
dav1d_w_mask_420_ssse3
;
c
->
blend
=
dav1d_blend_ssse3
;
c
->
blend_v
=
dav1d_blend_v_ssse3
;
c
->
blend_h
=
dav1d_blend_h_ssse3
;
#endif
if
(
!
(
flags
&
DAV1D_X86_CPU_FLAG_AVX2
))
...
...
This diff is collapsed.
Click to expand it.
src/x86/mc_ssse3.asm
+
92
−
0
View file @
0636633f
...
...
@@ -45,6 +45,7 @@ obmc_masks: db 0, 0, 0, 0
db
45
,
19
,
47
,
17
,
48
,
16
,
50
,
14
,
51
,
13
,
52
,
12
,
53
,
11
,
55
,
9
db
56
,
8
,
57
,
7
,
58
,
6
,
59
,
5
,
60
,
4
,
60
,
4
,
61
,
3
,
62
,
2
db
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
,
64
,
0
blend_shuf:
db
0
,
1
,
0
,
1
,
0
,
1
,
0
,
1
,
2
,
3
,
2
,
3
,
2
,
3
,
2
,
3
pb_64:
times
16
db
64
pw_8:
times
8
dw
8
...
...
@@ -73,6 +74,7 @@ BIDIR_JMP_TABLE mask_ssse3, 4, 8, 16, 32, 64, 128
BIDIR_JMP_TABLE
w_mask_420_ssse3
,
4
,
8
,
16
,
16
,
16
,
16
BIDIR_JMP_TABLE
bl
end_ssse3
,
4
,
8
,
16
,
32
BIDIR_JMP_TABLE
bl
end_v_ssse3
,
2
,
4
,
8
,
16
,
32
BIDIR_JMP_TABLE
bl
end_h_ssse3
,
2
,
4
,
8
,
16
,
16
,
16
,
16
SECTION
.text
...
...
@@ -643,3 +645,93 @@ cglobal blend_v, 3, 6, 8, dst, ds, tmp, w, h, mask
dec
hd
jg
.w32_loop
RET
cglobal
bl
end_h
,
4
,
7
,
6
,
ds
t
,
ds
,
tmp
,
w
,
h
,
mask
%define base r5-blend_h_ssse3_table
lea
r5
,
[
bl
end_h_ssse3_table
]
mov
r6d
,
wd
tzcnt
wd
,
wd
mov
hd
,
hm
movsxd
wq
,
dword
[
r5
+
wq
*
4
]
mova
m5
,
[
base
+
pw_512
]
add
wq
,
r5
lea
maskq
,
[
base
+
obmc_masks
+
hq
*
4
]
neg
hq
jmp
wq
.w2:
movd
m0
,
[
ds
tq
+
ds
q
*
0
]
pinsrw
m0
,
[
ds
tq
+
ds
q
*
1
],
1
movd
m2
,
[
maskq
+
hq
*
2
]
movd
m1
,
[
tmpq
]
punpcklwd
m2
,
m2
punpcklbw
m0
,
m1
pmaddubsw
m0
,
m2
pmulhrsw
m0
,
m5
packuswb
m0
,
m0
movd
r3d
,
m0
mov
[
ds
tq
+
ds
q
*
0
],
r3w
shr
r3d
,
16
mov
[
ds
tq
+
ds
q
*
1
],
r3w
lea
ds
tq
,
[
ds
tq
+
ds
q
*
2
]
add
tmpq
,
2
*
2
add
hq
,
2
jl
.w2
RET
.w4:
mova
m3
,
[
bl
end_shuf
]
.w4_loop:
movd
m0
,
[
ds
tq
+
ds
q
*
0
]
movd
m2
,
[
ds
tq
+
ds
q
*
1
]
punpckldq
m0
,
m2
; a
movq
m1
,
[
tmpq
]
; b
movq
m2
,
[
maskq
+
hq
*
2
]
; m
pshufb
m2
,
m3
punpcklbw
m0
,
m1
pmaddubsw
m0
,
m2
pmulhrsw
m0
,
m5
packuswb
m0
,
m0
movd
[
ds
tq
+
ds
q
*
0
],
m0
psrlq
m0
,
32
movd
[
ds
tq
+
ds
q
*
1
],
m0
lea
ds
tq
,
[
ds
tq
+
ds
q
*
2
]
add
tmpq
,
4
*
2
add
hq
,
2
jl
.w4_loop
RET
.w8:
movd
m4
,
[
maskq
+
hq
*
2
]
punpcklwd
m4
,
m4
pshufd
m3
,
m4
,
q0000
pshufd
m4
,
m4
,
q1111
movq
m1
,
[
ds
tq
+
ds
q
*
0
]
; a
movhps
m1
,
[
ds
tq
+
ds
q
*
1
]
mova
m2
,
[
tmpq
]
BLEND_64M
m1
,
m2
,
m3
,
m4
movq
[
ds
tq
+
ds
q
*
0
],
m0
movhps
[
ds
tq
+
ds
q
*
1
],
m0
lea
ds
tq
,
[
ds
tq
+
ds
q
*
2
]
add
tmpq
,
8
*
2
add
hq
,
2
jl
.w8
RET
; w16/w32/w64/w128
.w16:
sub
ds
q
,
r6
.w16_loop0:
movd
m3
,
[
maskq
+
hq
*
2
]
pshuflw
m3
,
m3
,
q0000
punpcklqdq
m3
,
m3
mov
wd
,
r6d
.w16_loop:
mova
m1
,
[
ds
tq
]
; a
mova
m2
,
[
tmpq
]
; b
BLEND_64M
m1
,
m2
,
m3
,
m3
mova
[
ds
tq
],
m0
add
ds
tq
,
16
add
tmpq
,
16
sub
wd
,
16
jg
.w16_loop
add
ds
tq
,
ds
q
inc
hq
jl
.w16_loop0
RET
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment