Commit 5f7f950c authored by Henrik Gramner's avatar Henrik Gramner
Browse files

x86: Correctly use v-prefix for instructions with opmasks

This was always required, but accidentally happened to work correctly
in a few cases.
parent 3d90057e
......@@ -621,8 +621,8 @@ cglobal sub16x16_dct, 3,3,6
SBUTTERFLY wd, 1, 0, 2
paddw m2, m1, m0
psubw m3, m1, m0
paddw m2 {k1}, m1 ; 0+1+2+3 0<<1+1-2-3<<1
psubw m3 {k1}, m0 ; 0-1-2+3 0-1<<1+2<<1-3
vpaddw m2 {k1}, m1 ; 0+1+2+3 0<<1+1-2-3<<1
vpsubw m3 {k1}, m0 ; 0-1-2+3 0-1<<1+2<<1-3
shufps m1, m2, m3, q2323 ; a3 b3 a2 b2 c3 d3 c2 d2
punpcklqdq m2, m3 ; a0 b0 a1 b1 c0 d0 c1 d1
SUMSUB_BA w, 1, 2, 3
......@@ -630,8 +630,8 @@ cglobal sub16x16_dct, 3,3,6
shufps m1, m2, q2020 ; a0+a3 b0+b3 c0+c3 d0+d3 a0-a3 b0-b3 c0-c3 d0-d3
paddw m2, m1, m3
psubw m0, m1, m3
paddw m2 {k2}, m1 ; 0'+1'+2'+3' 0'<<1+1'-2'-3'<<1
psubw m0 {k2}, m3 ; 0'-1'-2'+3' 0'-1'<<1+2'<<1-3'
vpaddw m2 {k2}, m1 ; 0'+1'+2'+3' 0'<<1+1'-2'-3'<<1
vpsubw m0 {k2}, m3 ; 0'-1'-2'+3' 0'-1'<<1+2'<<1-3'
%endmacro
INIT_XMM avx512
......@@ -743,7 +743,7 @@ cglobal sub8x8_dct_dc, 3,3
paddw xmm0, xmm2 ; 0+1 0+1 2+3 2+3
punpckldq xmm0, xmm1 ; 0+1 0+1 0-1 0-1 2+3 2+3 2-3 2-3
punpcklqdq xmm1, xmm0, xmm0
psubw xmm0 {k1}, xm3, xmm0
vpsubw xmm0 {k1}, xm3, xmm0
paddw xmm0, xmm1 ; 0+1+2+3 0+1-2-3 0-1+2-3 0-1-2+3
movhps [r0], xmm0
RET
......
......@@ -2518,8 +2518,8 @@ cglobal mbtree_propagate_list_internal, 5,7,21
paddd m6, m7 ; i_mb_x += 8
pand m3, m8 ; {x, y}
vprold m1, m3, 20 ; {y, x} << 4
psubw m3 {k4}, m9, m3 ; {32-x, 32-y}, {32-x, y}
psubw m1 {k5}, m10, m1 ; ({32-y, x}, {y, x}) << 4
vpsubw m3 {k4}, m9, m3 ; {32-x, 32-y}, {32-x, y}
vpsubw m1 {k5}, m10, m1 ; ({32-y, x}, {y, x}) << 4
pmullw m3, m1
paddsw m3, m3 ; prevent signed overflow in idx0 (32*32<<5 == 0x8000)
pmulhrsw m2, m3, m4 ; idx01weight idx23weightp
......@@ -2530,11 +2530,11 @@ cglobal mbtree_propagate_list_internal, 5,7,21
vpcmpuw k2, ym1, ym20, 1 ; {mbx, mbx+1} < width
kunpckwd k2, k2, k2
psrad m1, m0, 16
paddd m1 {k6}, m11
vpaddd m1 {k6}, m11
vpcmpud k1 {k1}, m1, m13, 1 ; mby < height | mby+1 < height
pmaddwd m0, m15
paddd m0 {k6}, m14 ; idx0 | idx2
vpaddd m0 {k6}, m14 ; idx0 | idx2
vmovdqu16 m2 {k2}{z}, m2 ; idx01weight | idx23weight
vptestmd k1 {k1}, m2, m2 ; mask out offsets with no changes
......
......@@ -4744,7 +4744,7 @@ cglobal intra_sad_x9_8x8, 5,7,8
%endmacro
%macro SATD_AVX512_END 0-1 0 ; sa8d
paddw m0 {k1}{z}, m1 ; zero-extend to dwords
vpaddw m0 {k1}{z}, m1 ; zero-extend to dwords
%if ARCH_X86_64
%if mmsize == 64
vextracti32x8 ym1, m0, 1
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment