Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
VideoLAN
x264
Commits
60f7c47d
Commit
60f7c47d
authored
Jul 29, 2008
by
Holger Lubitz
Committed by
Fiona Glaser
Jul 29, 2008
Browse files
Refactor asm macros part 1: DCT
parent
63b84fa4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
60f7c47d
...
...
@@ -23,7 +23,7 @@ endif
ifneq
($(AS),)
X86SRC0
=
cabac-a.asm dct-a.asm deblock-a.asm mc-a.asm mc-a2.asm
\
pixel-a.asm predict-a.asm quant-a.asm sad-a.asm
\
cpu-32.asm dct-32.asm
cpu-32.asm dct-32.asm
x86util.asm
X86SRC
=
$
(
X86SRC0:%
=
common/x86/%
)
ifeq
($(ARCH),X86)
...
...
common/x86/dct-32.asm
View file @
60f7c47d
...
...
@@ -24,6 +24,7 @@
;*****************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION
_RODATA
...
...
@@ -31,12 +32,6 @@ pw_32: times 8 dw 32
SECTION
.text
%macro SUMSUB_BA 2
paddw
%
1
,
%
2
paddw
%
2
,
%
2
psubw
%
2
,
%
1
%endmacro
%macro SBUTTERFLY 4
mova
m
%
4
,
m
%
2
punpckl
%
1
m
%
2
,
m
%
3
...
...
@@ -52,23 +47,6 @@ SECTION .text
SWAP
%
2
,
%
3
%endmacro
%macro LOAD_DIFF_8P 4
movh
%
1
,
%
3
movh
%
2
,
%
4
punpcklbw
%
1
,
%
2
punpcklbw
%
2
,
%
2
psubw
%
1
,
%
2
%endmacro
%macro STORE_DIFF_8P 4
psraw
%
1
,
6
movh
%
3
,
%
2
punpcklbw
%
3
,
%
4
paddsw
%
1
,
%
3
packuswb
%
1
,
%
1
movh
%
2
,
%
1
%endmacro
; in: m0..m7
; out: 0,4,6 in mem, rest in regs
%macro DCT8_1D 9
...
...
@@ -175,15 +153,15 @@ SECTION .text
INIT_MMX
ALIGN
16
load_diff_4x8_mmx:
LOAD_DIFF
_8P
m0
,
m7
,
[
r1
+
0
*
FENC_STRIDE
],
[
r2
+
0
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m1
,
m7
,
[
r1
+
1
*
FENC_STRIDE
],
[
r2
+
1
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m2
,
m7
,
[
r1
+
2
*
FENC_STRIDE
],
[
r2
+
2
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m3
,
m7
,
[
r1
+
3
*
FENC_STRIDE
],
[
r2
+
3
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m4
,
m7
,
[
r1
+
4
*
FENC_STRIDE
],
[
r2
+
4
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m5
,
m7
,
[
r1
+
5
*
FENC_STRIDE
],
[
r2
+
5
*
FDEC_STRIDE
]
LOAD_DIFF
m0
,
m7
,
none
,
[
r1
+
0
*
FENC_STRIDE
],
[
r2
+
0
*
FDEC_STRIDE
]
LOAD_DIFF
m1
,
m7
,
none
,
[
r1
+
1
*
FENC_STRIDE
],
[
r2
+
1
*
FDEC_STRIDE
]
LOAD_DIFF
m2
,
m7
,
none
,
[
r1
+
2
*
FENC_STRIDE
],
[
r2
+
2
*
FDEC_STRIDE
]
LOAD_DIFF
m3
,
m7
,
none
,
[
r1
+
3
*
FENC_STRIDE
],
[
r2
+
3
*
FDEC_STRIDE
]
LOAD_DIFF
m4
,
m7
,
none
,
[
r1
+
4
*
FENC_STRIDE
],
[
r2
+
4
*
FDEC_STRIDE
]
LOAD_DIFF
m5
,
m7
,
none
,
[
r1
+
5
*
FENC_STRIDE
],
[
r2
+
5
*
FDEC_STRIDE
]
movq
[
r0
],
m0
LOAD_DIFF
_8P
m6
,
m7
,
[
r1
+
6
*
FENC_STRIDE
],
[
r2
+
6
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m7
,
m0
,
[
r1
+
7
*
FENC_STRIDE
],
[
r2
+
7
*
FDEC_STRIDE
]
LOAD_DIFF
m6
,
m7
,
none
,
[
r1
+
6
*
FENC_STRIDE
],
[
r2
+
6
*
FDEC_STRIDE
]
LOAD_DIFF
m7
,
m0
,
none
,
[
r1
+
7
*
FENC_STRIDE
],
[
r2
+
7
*
FDEC_STRIDE
]
movq
m0
,
[
r0
]
ret
...
...
@@ -412,15 +390,15 @@ INIT_XMM
cglobal
x264_sub8x8_dct8_sse2
,
3
,
3
global
x264_sub8x8_dct8_sse2
%+
.skip_prologue
.skip_prologue:
LOAD_DIFF
_8P
m0
,
m7
,
[
r1
+
0
*
FENC_STRIDE
],
[
r2
+
0
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m1
,
m7
,
[
r1
+
1
*
FENC_STRIDE
],
[
r2
+
1
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m2
,
m7
,
[
r1
+
2
*
FENC_STRIDE
],
[
r2
+
2
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m3
,
m7
,
[
r1
+
3
*
FENC_STRIDE
],
[
r2
+
3
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m4
,
m7
,
[
r1
+
4
*
FENC_STRIDE
],
[
r2
+
4
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m5
,
m7
,
[
r1
+
5
*
FENC_STRIDE
],
[
r2
+
5
*
FDEC_STRIDE
]
LOAD_DIFF
m0
,
m7
,
none
,
[
r1
+
0
*
FENC_STRIDE
],
[
r2
+
0
*
FDEC_STRIDE
]
LOAD_DIFF
m1
,
m7
,
none
,
[
r1
+
1
*
FENC_STRIDE
],
[
r2
+
1
*
FDEC_STRIDE
]
LOAD_DIFF
m2
,
m7
,
none
,
[
r1
+
2
*
FENC_STRIDE
],
[
r2
+
2
*
FDEC_STRIDE
]
LOAD_DIFF
m3
,
m7
,
none
,
[
r1
+
3
*
FENC_STRIDE
],
[
r2
+
3
*
FDEC_STRIDE
]
LOAD_DIFF
m4
,
m7
,
none
,
[
r1
+
4
*
FENC_STRIDE
],
[
r2
+
4
*
FDEC_STRIDE
]
LOAD_DIFF
m5
,
m7
,
none
,
[
r1
+
5
*
FENC_STRIDE
],
[
r2
+
5
*
FDEC_STRIDE
]
SPILL
r0
,
0
LOAD_DIFF
_8P
m6
,
m7
,
[
r1
+
6
*
FENC_STRIDE
],
[
r2
+
6
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m7
,
m0
,
[
r1
+
7
*
FENC_STRIDE
],
[
r2
+
7
*
FDEC_STRIDE
]
LOAD_DIFF
m6
,
m7
,
none
,
[
r1
+
6
*
FENC_STRIDE
],
[
r2
+
6
*
FDEC_STRIDE
]
LOAD_DIFF
m7
,
m0
,
none
,
[
r1
+
7
*
FENC_STRIDE
],
[
r2
+
7
*
FDEC_STRIDE
]
UNSPILL
r0
,
0
DCT8_1D
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
r0
UNSPILL
r0
,
0
,
4
...
...
@@ -446,14 +424,14 @@ global x264_add8x8_idct8_sse2 %+ .skip_prologue
IDCT8_1D
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
r1
SPILL
r1
,
6
,
7
pxor
m7
,
m7
STORE_DIFF
_8P
m0
,
[
r0
+
FDEC_STRIDE
*
0
]
,
m6
,
m7
STORE_DIFF
_8P
m1
,
[
r0
+
FDEC_STRIDE
*
1
]
,
m6
,
m7
STORE_DIFF
_8P
m2
,
[
r0
+
FDEC_STRIDE
*
2
]
,
m6
,
m7
STORE_DIFF
_8P
m3
,
[
r0
+
FDEC_STRIDE
*
3
]
,
m6
,
m7
STORE_DIFF
_8P
m4
,
[
r0
+
FDEC_STRIDE
*
4
]
,
m6
,
m7
STORE_DIFF
_8P
m5
,
[
r0
+
FDEC_STRIDE
*
5
]
,
m6
,
m7
STORE_DIFF
m0
,
m6
,
m7
,
[
r0
+
FDEC_STRIDE
*
0
]
STORE_DIFF
m1
,
m6
,
m7
,
[
r0
+
FDEC_STRIDE
*
1
]
STORE_DIFF
m2
,
m6
,
m7
,
[
r0
+
FDEC_STRIDE
*
2
]
STORE_DIFF
m3
,
m6
,
m7
,
[
r0
+
FDEC_STRIDE
*
3
]
STORE_DIFF
m4
,
m6
,
m7
,
[
r0
+
FDEC_STRIDE
*
4
]
STORE_DIFF
m5
,
m6
,
m7
,
[
r0
+
FDEC_STRIDE
*
5
]
UNSPILL_SHUFFLE
r1
,
0
,
1
,
6
,
7
STORE_DIFF
_8P
m0
,
[
r0
+
FDEC_STRIDE
*
6
]
,
m6
,
m7
STORE_DIFF
_8P
m1
,
[
r0
+
FDEC_STRIDE
*
7
]
,
m6
,
m7
STORE_DIFF
m0
,
m6
,
m7
,
[
r0
+
FDEC_STRIDE
*
6
]
STORE_DIFF
m1
,
m6
,
m7
,
[
r0
+
FDEC_STRIDE
*
7
]
ret
common/x86/dct-64.asm
View file @
60f7c47d
...
...
@@ -23,6 +23,7 @@
;*****************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION
_RODATA
pw_32:
times
8
dw
32
...
...
@@ -31,20 +32,6 @@ SECTION .text
INIT_XMM
%macro LOAD_DIFF_8P 5
movq
%
1
,
%
4
punpcklbw
%
1
,
%
3
movq
%
2
,
%
5
punpcklbw
%
2
,
%
3
psubw
%
1
,
%
2
%endmacro
%macro SUMSUB_BA 2
paddw
%
1
,
%
2
paddw
%
2
,
%
2
psubw
%
2
,
%
1
%endmacro
%macro SBUTTERFLY 4
mova
m
%
4
,
m
%
2
punpckl
%
1
m
%
2
,
m
%
3
...
...
@@ -69,15 +56,6 @@ INIT_XMM
SWAP
%
4
,
%
7
%endmacro
%macro STORE_DIFF_8P 4
psraw
%
1
,
6
movq
%
2
,
%
4
punpcklbw
%
2
,
%
3
paddsw
%
1
,
%
2
packuswb
%
1
,
%
1
movq
%
4
,
%
1
%endmacro
SECTION
.text
%macro DCT8_1D 10
...
...
@@ -136,14 +114,14 @@ SECTION .text
; void x264_sub8x8_dct8_sse2( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
;-----------------------------------------------------------------------------
cglobal
x264_sub8x8_dct8_sse2
LOAD_DIFF
_8P
m0
,
m8
,
m9
,
[
r1
+
0
*
FENC_STRIDE
],
[
r2
+
0
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m1
,
m8
,
m9
,
[
r1
+
1
*
FENC_STRIDE
],
[
r2
+
1
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m2
,
m8
,
m9
,
[
r1
+
2
*
FENC_STRIDE
],
[
r2
+
2
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m3
,
m8
,
m9
,
[
r1
+
3
*
FENC_STRIDE
],
[
r2
+
3
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m4
,
m8
,
m9
,
[
r1
+
4
*
FENC_STRIDE
],
[
r2
+
4
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m5
,
m8
,
m9
,
[
r1
+
5
*
FENC_STRIDE
],
[
r2
+
5
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m6
,
m8
,
m9
,
[
r1
+
6
*
FENC_STRIDE
],
[
r2
+
6
*
FDEC_STRIDE
]
LOAD_DIFF
_8P
m7
,
m8
,
m9
,
[
r1
+
7
*
FENC_STRIDE
],
[
r2
+
7
*
FDEC_STRIDE
]
LOAD_DIFF
m0
,
m8
,
m9
,
[
r1
+
0
*
FENC_STRIDE
],
[
r2
+
0
*
FDEC_STRIDE
]
LOAD_DIFF
m1
,
m8
,
m9
,
[
r1
+
1
*
FENC_STRIDE
],
[
r2
+
1
*
FDEC_STRIDE
]
LOAD_DIFF
m2
,
m8
,
m9
,
[
r1
+
2
*
FENC_STRIDE
],
[
r2
+
2
*
FDEC_STRIDE
]
LOAD_DIFF
m3
,
m8
,
m9
,
[
r1
+
3
*
FENC_STRIDE
],
[
r2
+
3
*
FDEC_STRIDE
]
LOAD_DIFF
m4
,
m8
,
m9
,
[
r1
+
4
*
FENC_STRIDE
],
[
r2
+
4
*
FDEC_STRIDE
]
LOAD_DIFF
m5
,
m8
,
m9
,
[
r1
+
5
*
FENC_STRIDE
],
[
r2
+
5
*
FDEC_STRIDE
]
LOAD_DIFF
m6
,
m8
,
m9
,
[
r1
+
6
*
FENC_STRIDE
],
[
r2
+
6
*
FDEC_STRIDE
]
LOAD_DIFF
m7
,
m8
,
m9
,
[
r1
+
7
*
FENC_STRIDE
],
[
r2
+
7
*
FDEC_STRIDE
]
DCT8_1D
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
TRANSPOSE8x8W
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
...
...
@@ -232,14 +210,14 @@ cglobal x264_add8x8_idct8_sse2
IDCT8_1D
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
pxor
m9
,
m9
STORE_DIFF
_8P
m0
,
m8
,
m9
,
[
r0
+
0
*
FDEC_STRIDE
]
STORE_DIFF
_8P
m1
,
m8
,
m9
,
[
r0
+
1
*
FDEC_STRIDE
]
STORE_DIFF
_8P
m2
,
m8
,
m9
,
[
r0
+
2
*
FDEC_STRIDE
]
STORE_DIFF
_8P
m3
,
m8
,
m9
,
[
r0
+
3
*
FDEC_STRIDE
]
STORE_DIFF
_8P
m4
,
m8
,
m9
,
[
r0
+
4
*
FDEC_STRIDE
]
STORE_DIFF
_8P
m5
,
m8
,
m9
,
[
r0
+
5
*
FDEC_STRIDE
]
STORE_DIFF
_8P
m6
,
m8
,
m9
,
[
r0
+
6
*
FDEC_STRIDE
]
STORE_DIFF
_8P
m7
,
m8
,
m9
,
[
r0
+
7
*
FDEC_STRIDE
]
STORE_DIFF
m0
,
m8
,
m9
,
[
r0
+
0
*
FDEC_STRIDE
]
STORE_DIFF
m1
,
m8
,
m9
,
[
r0
+
1
*
FDEC_STRIDE
]
STORE_DIFF
m2
,
m8
,
m9
,
[
r0
+
2
*
FDEC_STRIDE
]
STORE_DIFF
m3
,
m8
,
m9
,
[
r0
+
3
*
FDEC_STRIDE
]
STORE_DIFF
m4
,
m8
,
m9
,
[
r0
+
4
*
FDEC_STRIDE
]
STORE_DIFF
m5
,
m8
,
m9
,
[
r0
+
5
*
FDEC_STRIDE
]
STORE_DIFF
m6
,
m8
,
m9
,
[
r0
+
6
*
FDEC_STRIDE
]
STORE_DIFF
m7
,
m8
,
m9
,
[
r0
+
7
*
FDEC_STRIDE
]
ret
common/x86/dct-a.asm
View file @
60f7c47d
...
...
@@ -23,6 +23,7 @@
;*****************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION
_RODATA
pw_1:
times
8
dw
1
...
...
@@ -31,46 +32,6 @@ pb_zigzag4: db 0,1,4,8,5,2,3,6,9,12,13,10,7,11,14,15
SECTION
.text
%macro LOAD_DIFF_4P 5
movh
%
1
,
%
4
punpcklbw
%
1
,
%
3
movh
%
2
,
%
5
punpcklbw
%
2
,
%
3
psubw
%
1
,
%
2
%endmacro
%macro SUMSUB_BA 2
paddw
%
1
,
%
2
paddw
%
2
,
%
2
psubw
%
2
,
%
1
%endmacro
%macro SUMSUB_BADC 4
paddw
%
1
,
%
2
paddw
%
3
,
%
4
paddw
%
2
,
%
2
paddw
%
4
,
%
4
psubw
%
2
,
%
1
psubw
%
4
,
%
3
%endmacro
%macro SUMSUB2_AB 3
mova
%
3
,
%
1
paddw
%
1
,
%
1
paddw
%
1
,
%
2
psubw
%
3
,
%
2
psubw
%
3
,
%
2
%endmacro
%macro SUMSUBD2_AB 4
mova
%
4
,
%
1
mova
%
3
,
%
2
psraw
%
2
,
1
psraw
%
4
,
1
paddw
%
1
,
%
2
psubw
%
4
,
%
3
%endmacro
%macro SBUTTERFLY 4
mova
m
%
4
,
m
%
2
punpckl
%
1
m
%
2
,
m
%
3
...
...
@@ -95,15 +56,6 @@ SECTION .text
SBUTTERFLY
qdq
,
%
3
,
%
4
,
%
5
%endmacro
%macro STORE_DIFF_4P 4
psraw
%
1
,
6
movh
%
2
,
%
4
punpcklbw
%
2
,
%
3
paddsw
%
1
,
%
2
packuswb
%
1
,
%
1
movh
%
4
,
%
1
%endmacro
%macro HADAMARD4_1D 4
SUMSUB_BADC
m
%
2
,
m
%
1
,
m
%
4
,
m
%
3
SUMSUB_BADC
m
%
4
,
m
%
2
,
m
%
3
,
m
%
1
...
...
@@ -173,10 +125,10 @@ cglobal x264_idct4x4dc_mmx, 1,1
cglobal
x264_sub4x4_dct_mmx
,
3
,
3
.skip_prologue:
%macro SUB_DCT4 1
LOAD_DIFF
_4P
m0
,
m6
,
m7
,
[
r1
+
0
*
FENC_STRIDE
],
[
r2
+
0
*
FDEC_STRIDE
]
LOAD_DIFF
_4P
m1
,
m6
,
m7
,
[
r1
+
1
*
FENC_STRIDE
],
[
r2
+
1
*
FDEC_STRIDE
]
LOAD_DIFF
_4P
m2
,
m6
,
m7
,
[
r1
+
2
*
FENC_STRIDE
],
[
r2
+
2
*
FDEC_STRIDE
]
LOAD_DIFF
_4P
m3
,
m6
,
m7
,
[
r1
+
3
*
FENC_STRIDE
],
[
r2
+
3
*
FDEC_STRIDE
]
LOAD_DIFF
m0
,
m6
,
m7
,
[
r1
+
0
*
FENC_STRIDE
],
[
r2
+
0
*
FDEC_STRIDE
]
LOAD_DIFF
m1
,
m6
,
m7
,
[
r1
+
1
*
FENC_STRIDE
],
[
r2
+
1
*
FDEC_STRIDE
]
LOAD_DIFF
m2
,
m6
,
m7
,
[
r1
+
2
*
FENC_STRIDE
],
[
r2
+
2
*
FDEC_STRIDE
]
LOAD_DIFF
m3
,
m6
,
m7
,
[
r1
+
3
*
FENC_STRIDE
],
[
r2
+
3
*
FDEC_STRIDE
]
DCT4_1D
0
,
1
,
2
,
3
,
4
TRANSPOSE
%
1
0
,
1
,
2
,
3
,
4
DCT4_1D
0
,
1
,
2
,
3
,
4
...
...
@@ -203,10 +155,10 @@ cglobal x264_add4x4_idct_mmx, 2,2,1
paddw
m0
,
[
pw_32
GLOBAL
]
IDCT4_1D
0
,
1
,
2
,
3
,
4
,
5
pxor
m7
,
m7
STORE_DIFF
_4P
m0
,
m4
,
m7
,
[
r0
+
0
*
FDEC_STRIDE
]
STORE_DIFF
_4P
m1
,
m4
,
m7
,
[
r0
+
1
*
FDEC_STRIDE
]
STORE_DIFF
_4P
m2
,
m4
,
m7
,
[
r0
+
2
*
FDEC_STRIDE
]
STORE_DIFF
_4P
m3
,
m4
,
m7
,
[
r0
+
3
*
FDEC_STRIDE
]
STORE_DIFF
m0
,
m4
,
m7
,
[
r0
+
0
*
FDEC_STRIDE
]
STORE_DIFF
m1
,
m4
,
m7
,
[
r0
+
1
*
FDEC_STRIDE
]
STORE_DIFF
m2
,
m4
,
m7
,
[
r0
+
2
*
FDEC_STRIDE
]
STORE_DIFF
m3
,
m4
,
m7
,
[
r0
+
3
*
FDEC_STRIDE
]
%endmacro
ADD_IDCT4
4
x4W
RET
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment