Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
VideoLAN
dav1d
Commits
93c4bea2
Commit
93c4bea2
authored
Oct 20, 2018
by
Henrik Gramner
Browse files
x86: Add pal_pred AVX2 asm
parent
0ba64ee5
Pipeline
#1161
passed with stage
in 2 minutes
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/ipred.h
View file @
93c4bea2
...
...
@@ -70,7 +70,7 @@ typedef decl_cfl_pred_fn(*cfl_pred_fn);
*/
#define decl_pal_pred_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const uint16_t *pal, \
const uint8_t *idx,
const int w, const
int h)
const uint8_t *idx,
int w,
int h)
typedef
decl_pal_pred_fn
(
*
pal_pred_fn
);
typedef
struct
Dav1dIntraPredDSPContext
{
...
...
src/x86/ipred.asm
View file @
93c4bea2
...
...
@@ -93,6 +93,7 @@ JMP_TABLE ipred_h, avx2, w4, w8, w16, w32, w64
JMP_TABLE
ipred_cfl
,
avx2
,
h4
,
h8
,
h16
,
h32
,
w4
,
w8
,
w16
,
w32
,
\
s4
-
8
*
4
,
s8
-
8
*
4
,
s16
-
8
*
4
,
s32
-
8
*
4
JMP_TABLE
ipred_cfl_left
,
avx2
,
h4
,
h8
,
h16
,
h32
JMP_TABLE
pal_pred
,
avx2
,
w4
,
w8
,
w16
,
w32
,
w64
SECTION
.text
...
...
@@ -1515,4 +1516,82 @@ cglobal ipred_cfl_128, 3, 7, 6, dst, stride, tl, w, h, ac, alpha
movifnidn
acq
,
acmp
jmp
wq
cglobal
pal_pred
,
4
,
6
,
5
,
ds
t
,
stride
,
pal
,
idx
,
w
,
h
vbroadcasti128
m4
,
[
palq
]
lea
r2
,
[
pal_pred_avx2_table
]
tzcnt
wd
,
wm
movifnidn
hd
,
hm
movsxd
wq
,
[
r2
+
wq
*
4
]
packuswb
m4
,
m4
add
wq
,
r2
lea
r2
,
[
strideq
*
3
]
jmp
wq
.w4:
pshufb
xm0
,
xm4
,
[
idxq
]
add
idxq
,
16
movd
[
ds
tq
+
strideq
*
0
],
xm0
pextrd
[
ds
tq
+
strideq
*
1
],
xm0
,
1
pextrd
[
ds
tq
+
strideq
*
2
],
xm0
,
2
pextrd
[
ds
tq
+
r2
],
xm0
,
3
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
sub
hd
,
4
jg
.w4
RET
ALIGN
function_align
.w8:
pshufb
xm0
,
xm4
,
[
idxq
+
16
*
0
]
pshufb
xm1
,
xm4
,
[
idxq
+
16
*
1
]
add
idxq
,
16
*
2
movq
[
ds
tq
+
strideq
*
0
],
xm0
movhps
[
ds
tq
+
strideq
*
1
],
xm0
movq
[
ds
tq
+
strideq
*
2
],
xm1
movhps
[
ds
tq
+
r2
],
xm1
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
sub
hd
,
4
jg
.w8
RET
ALIGN
function_align
.w16:
pshufb
m0
,
m4
,
[
idxq
+
32
*
0
]
pshufb
m1
,
m4
,
[
idxq
+
32
*
1
]
add
idxq
,
32
*
2
mova
[
ds
tq
+
strideq
*
0
],
xm0
vextracti128
[
ds
tq
+
strideq
*
1
],
m0
,
1
mova
[
ds
tq
+
strideq
*
2
],
xm1
vextracti128
[
ds
tq
+
r2
],
m1
,
1
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
sub
hd
,
4
jg
.w16
RET
ALIGN
function_align
.w32:
pshufb
m0
,
m4
,
[
idxq
+
32
*
0
]
pshufb
m1
,
m4
,
[
idxq
+
32
*
1
]
pshufb
m2
,
m4
,
[
idxq
+
32
*
2
]
pshufb
m3
,
m4
,
[
idxq
+
32
*
3
]
add
idxq
,
32
*
4
mova
[
ds
tq
+
strideq
*
0
],
m0
mova
[
ds
tq
+
strideq
*
1
],
m1
mova
[
ds
tq
+
strideq
*
2
],
m2
mova
[
ds
tq
+
r2
],
m3
lea
ds
tq
,
[
ds
tq
+
strideq
*
4
]
sub
hd
,
4
jg
.w32
RET
ALIGN
function_align
.w64:
pshufb
m0
,
m4
,
[
idxq
+
32
*
0
]
pshufb
m1
,
m4
,
[
idxq
+
32
*
1
]
pshufb
m2
,
m4
,
[
idxq
+
32
*
2
]
pshufb
m3
,
m4
,
[
idxq
+
32
*
3
]
add
idxq
,
32
*
4
mova
[
ds
tq
+
strideq
*
0
+
32
*
0
],
m0
mova
[
ds
tq
+
strideq
*
0
+
32
*
1
],
m1
mova
[
ds
tq
+
strideq
*
1
+
32
*
0
],
m2
mova
[
ds
tq
+
strideq
*
1
+
32
*
1
],
m3
lea
ds
tq
,
[
ds
tq
+
strideq
*
2
]
sub
hd
,
2
jg
.w64
RET
%endif
src/x86/ipred_init.c
View file @
93c4bea2
...
...
@@ -44,6 +44,8 @@ decl_cfl_pred_fn(dav1d_ipred_cfl_128_avx2);
decl_cfl_pred_fn
(
dav1d_ipred_cfl_top_avx2
);
decl_cfl_pred_fn
(
dav1d_ipred_cfl_left_avx2
);
decl_pal_pred_fn
(
dav1d_pal_pred_avx2
);
void
bitfn
(
dav1d_intra_pred_dsp_init_x86
)(
Dav1dIntraPredDSPContext
*
const
c
)
{
const
unsigned
flags
=
dav1d_get_cpu_flags
();
...
...
@@ -65,5 +67,7 @@ void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
c
->
cfl_pred
[
DC_128_PRED
]
=
dav1d_ipred_cfl_128_avx2
;
c
->
cfl_pred
[
TOP_DC_PRED
]
=
dav1d_ipred_cfl_top_avx2
;
c
->
cfl_pred
[
LEFT_DC_PRED
]
=
dav1d_ipred_cfl_left_avx2
;
c
->
pal_pred
=
dav1d_pal_pred_avx2
;
#endif
}
tests/checkasm/ipred.c
View file @
93c4bea2
...
...
@@ -142,10 +142,42 @@ static void check_cfl_pred(Dav1dIntraPredDSPContext *const c) {
report
(
"cfl_pred"
);
}
static
void
check_pal_pred
(
Dav1dIntraPredDSPContext
*
const
c
)
{
ALIGN_STK_32
(
pixel
,
c_dst
,
64
*
64
,);
ALIGN_STK_32
(
pixel
,
a_dst
,
64
*
64
,);
ALIGN_STK_32
(
uint8_t
,
idx
,
64
*
64
,);
ALIGN_STK_16
(
uint16_t
,
pal
,
8
,);
declare_func
(
void
,
pixel
*
dst
,
ptrdiff_t
stride
,
const
uint16_t
*
pal
,
const
uint8_t
*
idx
,
int
w
,
int
h
);
for
(
int
w
=
4
;
w
<=
64
;
w
<<=
1
)
if
(
check_func
(
c
->
pal_pred
,
"pal_pred_w%d_%dbpc"
,
w
,
BITDEPTH
))
for
(
int
h
=
imax
(
w
/
4
,
4
);
h
<=
imin
(
w
*
4
,
64
);
h
<<=
1
)
{
const
ptrdiff_t
stride
=
w
*
sizeof
(
pixel
);
for
(
int
i
=
0
;
i
<
8
;
i
++
)
pal
[
i
]
=
rand
()
&
((
1
<<
BITDEPTH
)
-
1
);
for
(
int
i
=
0
;
i
<
w
*
h
;
i
++
)
idx
[
i
]
=
rand
()
&
7
;
call_ref
(
c_dst
,
stride
,
pal
,
idx
,
w
,
h
);
call_new
(
a_dst
,
stride
,
pal
,
idx
,
w
,
h
);
if
(
memcmp
(
c_dst
,
a_dst
,
w
*
h
*
sizeof
(
*
c_dst
)))
fail
();
bench_new
(
a_dst
,
stride
,
pal
,
idx
,
w
,
h
);
}
report
(
"pal_pred"
);
}
void
bitfn
(
checkasm_check_ipred
)(
void
)
{
Dav1dIntraPredDSPContext
c
;
bitfn
(
dav1d_intra_pred_dsp_init
)(
&
c
);
check_intra_pred
(
&
c
);
check_cfl_pred
(
&
c
);
check_pal_pred
(
&
c
);
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment