Commit 5f7f950c authored by Henrik Gramner's avatar Henrik Gramner
Browse files

x86: Correctly use v-prefix for instructions with opmasks

This was always required, but accidentally happened to work correctly
in a few cases.
parent 3d90057e
...@@ -621,8 +621,8 @@ cglobal sub16x16_dct, 3,3,6 ...@@ -621,8 +621,8 @@ cglobal sub16x16_dct, 3,3,6
SBUTTERFLY wd, 1, 0, 2 SBUTTERFLY wd, 1, 0, 2
paddw m2, m1, m0 paddw m2, m1, m0
psubw m3, m1, m0 psubw m3, m1, m0
paddw m2 {k1}, m1 ; 0+1+2+3 0<<1+1-2-3<<1 vpaddw m2 {k1}, m1 ; 0+1+2+3 0<<1+1-2-3<<1
psubw m3 {k1}, m0 ; 0-1-2+3 0-1<<1+2<<1-3 vpsubw m3 {k1}, m0 ; 0-1-2+3 0-1<<1+2<<1-3
shufps m1, m2, m3, q2323 ; a3 b3 a2 b2 c3 d3 c2 d2 shufps m1, m2, m3, q2323 ; a3 b3 a2 b2 c3 d3 c2 d2
punpcklqdq m2, m3 ; a0 b0 a1 b1 c0 d0 c1 d1 punpcklqdq m2, m3 ; a0 b0 a1 b1 c0 d0 c1 d1
SUMSUB_BA w, 1, 2, 3 SUMSUB_BA w, 1, 2, 3
...@@ -630,8 +630,8 @@ cglobal sub16x16_dct, 3,3,6 ...@@ -630,8 +630,8 @@ cglobal sub16x16_dct, 3,3,6
shufps m1, m2, q2020 ; a0+a3 b0+b3 c0+c3 d0+d3 a0-a3 b0-b3 c0-c3 d0-d3 shufps m1, m2, q2020 ; a0+a3 b0+b3 c0+c3 d0+d3 a0-a3 b0-b3 c0-c3 d0-d3
paddw m2, m1, m3 paddw m2, m1, m3
psubw m0, m1, m3 psubw m0, m1, m3
paddw m2 {k2}, m1 ; 0'+1'+2'+3' 0'<<1+1'-2'-3'<<1 vpaddw m2 {k2}, m1 ; 0'+1'+2'+3' 0'<<1+1'-2'-3'<<1
psubw m0 {k2}, m3 ; 0'-1'-2'+3' 0'-1'<<1+2'<<1-3' vpsubw m0 {k2}, m3 ; 0'-1'-2'+3' 0'-1'<<1+2'<<1-3'
%endmacro %endmacro
INIT_XMM avx512 INIT_XMM avx512
...@@ -743,7 +743,7 @@ cglobal sub8x8_dct_dc, 3,3 ...@@ -743,7 +743,7 @@ cglobal sub8x8_dct_dc, 3,3
paddw xmm0, xmm2 ; 0+1 0+1 2+3 2+3 paddw xmm0, xmm2 ; 0+1 0+1 2+3 2+3
punpckldq xmm0, xmm1 ; 0+1 0+1 0-1 0-1 2+3 2+3 2-3 2-3 punpckldq xmm0, xmm1 ; 0+1 0+1 0-1 0-1 2+3 2+3 2-3 2-3
punpcklqdq xmm1, xmm0, xmm0 punpcklqdq xmm1, xmm0, xmm0
psubw xmm0 {k1}, xm3, xmm0 vpsubw xmm0 {k1}, xm3, xmm0
paddw xmm0, xmm1 ; 0+1+2+3 0+1-2-3 0-1+2-3 0-1-2+3 paddw xmm0, xmm1 ; 0+1+2+3 0+1-2-3 0-1+2-3 0-1-2+3
movhps [r0], xmm0 movhps [r0], xmm0
RET RET
......
...@@ -2518,8 +2518,8 @@ cglobal mbtree_propagate_list_internal, 5,7,21 ...@@ -2518,8 +2518,8 @@ cglobal mbtree_propagate_list_internal, 5,7,21
paddd m6, m7 ; i_mb_x += 8 paddd m6, m7 ; i_mb_x += 8
pand m3, m8 ; {x, y} pand m3, m8 ; {x, y}
vprold m1, m3, 20 ; {y, x} << 4 vprold m1, m3, 20 ; {y, x} << 4
psubw m3 {k4}, m9, m3 ; {32-x, 32-y}, {32-x, y} vpsubw m3 {k4}, m9, m3 ; {32-x, 32-y}, {32-x, y}
psubw m1 {k5}, m10, m1 ; ({32-y, x}, {y, x}) << 4 vpsubw m1 {k5}, m10, m1 ; ({32-y, x}, {y, x}) << 4
pmullw m3, m1 pmullw m3, m1
paddsw m3, m3 ; prevent signed overflow in idx0 (32*32<<5 == 0x8000) paddsw m3, m3 ; prevent signed overflow in idx0 (32*32<<5 == 0x8000)
pmulhrsw m2, m3, m4 ; idx01weight idx23weightp pmulhrsw m2, m3, m4 ; idx01weight idx23weightp
...@@ -2530,11 +2530,11 @@ cglobal mbtree_propagate_list_internal, 5,7,21 ...@@ -2530,11 +2530,11 @@ cglobal mbtree_propagate_list_internal, 5,7,21
vpcmpuw k2, ym1, ym20, 1 ; {mbx, mbx+1} < width vpcmpuw k2, ym1, ym20, 1 ; {mbx, mbx+1} < width
kunpckwd k2, k2, k2 kunpckwd k2, k2, k2
psrad m1, m0, 16 psrad m1, m0, 16
paddd m1 {k6}, m11 vpaddd m1 {k6}, m11
vpcmpud k1 {k1}, m1, m13, 1 ; mby < height | mby+1 < height vpcmpud k1 {k1}, m1, m13, 1 ; mby < height | mby+1 < height
pmaddwd m0, m15 pmaddwd m0, m15
paddd m0 {k6}, m14 ; idx0 | idx2 vpaddd m0 {k6}, m14 ; idx0 | idx2
vmovdqu16 m2 {k2}{z}, m2 ; idx01weight | idx23weight vmovdqu16 m2 {k2}{z}, m2 ; idx01weight | idx23weight
vptestmd k1 {k1}, m2, m2 ; mask out offsets with no changes vptestmd k1 {k1}, m2, m2 ; mask out offsets with no changes
......
...@@ -4744,7 +4744,7 @@ cglobal intra_sad_x9_8x8, 5,7,8 ...@@ -4744,7 +4744,7 @@ cglobal intra_sad_x9_8x8, 5,7,8
%endmacro %endmacro
%macro SATD_AVX512_END 0-1 0 ; sa8d %macro SATD_AVX512_END 0-1 0 ; sa8d
paddw m0 {k1}{z}, m1 ; zero-extend to dwords vpaddw m0 {k1}{z}, m1 ; zero-extend to dwords
%if ARCH_X86_64 %if ARCH_X86_64
%if mmsize == 64 %if mmsize == 64
vextracti32x8 ym1, m0, 1 vextracti32x8 ym1, m0, 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment