Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
VideoLAN
x264
Commits
d1fbc652
Commit
d1fbc652
authored
Oct 21, 2008
by
Fiona Glaser
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add assembly versions of decimate_score
3-7x faster decimation, 1-3% faster overall
parent
8d6b262d
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
349 additions
and
51 deletions
+349
-51
common/quant.c
common/quant.c
+72
-0
common/quant.h
common/quant.h
+4
-0
common/x86/quant-a.asm
common/x86/quant-a.asm
+225
-0
common/x86/quant.h
common/x86/quant.h
+9
-0
encoder/macroblock.c
encoder/macroblock.c
+8
-51
tools/checkasm.c
tools/checkasm.c
+31
-0
No files found.
common/quant.c
View file @
d1fbc652
...
...
@@ -208,6 +208,66 @@ static void x264_denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int
}
}
/* (ref: JVT-B118)
* x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
* to 0 (low score means set it to null)
* Used in inter macroblock (luma and chroma)
* luma: for a 8x8 block: if score < 4 -> null
* for the complete mb: if score < 6 -> null
* chroma: for the complete mb: if score < 7 -> null
*/
const
uint8_t
x264_decimate_table4
[
16
]
=
{
3
,
2
,
2
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
const
uint8_t
x264_decimate_table8
[
64
]
=
{
3
,
3
,
3
,
3
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
static
int
ALWAYS_INLINE
x264_decimate_score_internal
(
int16_t
*
dct
,
int
i_max
)
{
const
uint8_t
*
ds_table
=
(
i_max
==
64
)
?
x264_decimate_table8
:
x264_decimate_table4
;
int
i_score
=
0
;
int
idx
=
i_max
-
1
;
/* Yes, dct[idx-1] is guaranteed to be 32-bit aligned. idx>=0 instead of 1 works correctly for the same reason */
while
(
idx
>=
0
&&
*
(
uint32_t
*
)
&
dct
[
idx
-
1
]
==
0
)
idx
-=
2
;
if
(
idx
>=
0
&&
dct
[
idx
]
==
0
)
idx
--
;
while
(
idx
>=
0
)
{
int
i_run
;
if
(
(
unsigned
)(
dct
[
idx
--
]
+
1
)
>
2
)
return
9
;
i_run
=
0
;
while
(
idx
>=
0
&&
dct
[
idx
]
==
0
)
{
idx
--
;
i_run
++
;
}
i_score
+=
ds_table
[
i_run
];
}
return
i_score
;
}
static
int
x264_decimate_score15
(
int16_t
*
dct
)
{
return
x264_decimate_score_internal
(
dct
+
1
,
15
);
}
static
int
x264_decimate_score16
(
int16_t
*
dct
)
{
return
x264_decimate_score_internal
(
dct
,
16
);
}
static
int
x264_decimate_score64
(
int16_t
*
dct
)
{
return
x264_decimate_score_internal
(
dct
,
64
);
}
void
x264_quant_init
(
x264_t
*
h
,
int
cpu
,
x264_quant_function_t
*
pf
)
{
pf
->
quant_8x8
=
quant_8x8
;
...
...
@@ -219,6 +279,9 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
dequant_8x8
=
dequant_8x8
;
pf
->
denoise_dct
=
x264_denoise_dct
;
pf
->
decimate_score15
=
x264_decimate_score15
;
pf
->
decimate_score16
=
x264_decimate_score16
;
pf
->
decimate_score64
=
x264_decimate_score64
;
#ifdef HAVE_MMX
if
(
cpu
&
X264_CPU_MMX
)
...
...
@@ -242,6 +305,9 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
quant_2x2_dc
=
x264_quant_2x2_dc_mmxext
;
#ifdef ARCH_X86
pf
->
quant_4x4_dc
=
x264_quant_4x4_dc_mmxext
;
pf
->
decimate_score15
=
x264_decimate_score15_mmxext
;
pf
->
decimate_score16
=
x264_decimate_score16_mmxext
;
pf
->
decimate_score64
=
x264_decimate_score64_mmxext
;
#endif
}
...
...
@@ -258,6 +324,9 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
dequant_8x8
=
x264_dequant_8x8_flat16_sse2
;
}
pf
->
denoise_dct
=
x264_denoise_dct_sse2
;
pf
->
decimate_score15
=
x264_decimate_score15_sse2
;
pf
->
decimate_score16
=
x264_decimate_score16_sse2
;
pf
->
decimate_score64
=
x264_decimate_score64_sse2
;
}
if
(
cpu
&
X264_CPU_SSSE3
)
...
...
@@ -267,6 +336,9 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
quant_4x4
=
x264_quant_4x4_ssse3
;
pf
->
quant_8x8
=
x264_quant_8x8_ssse3
;
pf
->
denoise_dct
=
x264_denoise_dct_ssse3
;
pf
->
decimate_score15
=
x264_decimate_score15_ssse3
;
pf
->
decimate_score16
=
x264_decimate_score16_ssse3
;
pf
->
decimate_score64
=
x264_decimate_score64_ssse3
;
}
#endif // HAVE_MMX
...
...
common/quant.h
View file @
d1fbc652
...
...
@@ -34,6 +34,10 @@ typedef struct
void
(
*
dequant_8x8
)(
int16_t
dct
[
8
][
8
],
int
dequant_mf
[
6
][
8
][
8
],
int
i_qp
);
void
(
*
denoise_dct
)(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
);
int
(
*
decimate_score15
)(
int16_t
*
dct
);
int
(
*
decimate_score16
)(
int16_t
*
dct
);
int
(
*
decimate_score64
)(
int16_t
*
dct
);
}
x264_quant_function_t
;
void
x264_quant_init
(
x264_t
*
h
,
int
cpu
,
x264_quant_function_t
*
pf
);
...
...
common/x86/quant-a.asm
View file @
d1fbc652
...
...
@@ -22,8 +22,10 @@
;*****************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION
_RODATA
pb_1:
times
16
db
1
pw_1:
times
8
dw
1
pd_1:
times
4
dd
1
...
...
@@ -54,6 +56,17 @@ dequant8_scale:
DQ
M8
32
,
28
,
51
,
30
,
40
,
38
DQ
M8
36
,
32
,
58
,
34
,
46
,
43
decimate_mask_table4:
db
0
,
3
,
2
,
6
,
2
,
5
,
5
,
9
,
1
,
5
,
4
,
8
,
5
,
8
,
8
,
12
,
1
,
4
,
4
,
8
,
4
,
7
,
7
,
11
,
4
,
8
,
7
,
11
,
8
,
11
,
11
,
15
,
1
,
4
db
3
,
7
,
4
,
7
,
7
,
11
,
3
,
7
,
6
,
10
,
7
,
10
,
10
,
14
,
4
,
7
,
7
,
11
,
7
,
10
,
10
,
14
,
7
,
11
,
10
,
14
,
11
,
14
,
14
db
18
,
0
,
4
,
3
,
7
,
3
,
6
,
6
,
10
,
3
,
7
,
6
,
10
,
7
,
10
,
10
,
14
,
3
,
6
,
6
,
10
,
6
,
9
,
9
,
13
,
6
,
10
,
9
,
13
,
10
,
13
db
13
,
17
,
4
,
7
,
6
,
10
,
7
,
10
,
10
,
14
,
6
,
10
,
9
,
13
,
10
,
13
,
13
,
17
,
7
,
10
,
10
,
14
,
10
,
13
,
13
,
17
,
10
db
14
,
13
,
17
,
14
,
17
,
17
,
21
,
0
,
3
,
3
,
7
,
3
,
6
,
6
,
10
,
2
,
6
,
5
,
9
,
6
,
9
,
9
,
13
,
3
,
6
,
6
,
10
,
6
,
9
,
9
,
13
db
6
,
10
,
9
,
13
,
10
,
13
,
13
,
17
,
3
,
6
,
5
,
9
,
6
,
9
,
9
,
13
,
5
,
9
,
8
,
12
,
9
,
12
,
12
,
16
,
6
,
9
,
9
,
13
,
9
,
12
db
12
,
16
,
9
,
13
,
12
,
16
,
13
,
16
,
16
,
20
,
3
,
7
,
6
,
10
,
6
,
9
,
9
,
13
,
6
,
10
,
9
,
13
,
10
,
13
,
13
,
17
,
6
,
9
db
9
,
13
,
9
,
12
,
12
,
16
,
9
,
13
,
12
,
16
,
13
,
16
,
16
,
20
,
7
,
10
,
9
,
13
,
10
,
13
,
13
,
17
,
9
,
13
,
12
,
16
db
13
,
16
,
16
,
20
,
10
,
13
,
13
,
17
,
13
,
16
,
16
,
20
,
13
,
17
,
16
,
20
,
17
,
20
,
20
,
24
SECTION
.text
%macro QUANT_DC_START 0
...
...
@@ -379,3 +392,215 @@ DENOISE_DCT sse2
%define PABSW PABSW_SSSE3
%define PSIGNW PSIGNW_SSSE3
DENOISE_DCT
ss
se3
;-----------------------------------------------------------------------------
; int x264_decimate_score( int16_t *dct )
;-----------------------------------------------------------------------------
%macro DECIMATE_MASK_SSE2 6
%ifidn %5, ssse3
pabsw
xmm0
,
[
%
3
+
0
]
pabsw
xmm1
,
[
%
3
+
16
]
%else
movdqa
xmm0
,
[
%
3
+
0
]
movdqa
xmm1
,
[
%
3
+
16
]
ABS2_MMX
xmm0
,
xmm1
,
xmm3
,
xmm4
%endif
packsswb
xmm0
,
xmm1
pxor
xmm2
,
xmm2
pcmpeqb
xmm2
,
xmm0
pcmpgtb
xmm0
,
%
4
pmovmskb
%
1
,
xmm2
pmovmskb
%
2
,
xmm0
%endmacro
%macro DECIMATE_MASK_MMX 6
movq
mm0
,
[
%
3
+
0
]
movq
mm1
,
[
%
3
+
8
]
movq
mm2
,
[
%
3
+
16
]
movq
mm3
,
[
%
3
+
24
]
ABS2_MMX
mm0
,
mm1
,
mm4
,
mm5
ABS2_MMX
mm2
,
mm3
,
mm4
,
mm5
packsswb
mm0
,
mm1
packsswb
mm2
,
mm3
pxor
mm4
,
mm4
pxor
mm5
,
mm5
pcmpeqb
mm4
,
mm0
pcmpeqb
mm5
,
mm2
pcmpgtb
mm0
,
%
4
pcmpgtb
mm2
,
%
4
pmovmskb
%
6
,
mm4
pmovmskb
%
1
,
mm5
shl
%
1
,
8
or
%
1
,
%
6
pmovmskb
%
6
,
mm0
pmovmskb
%
2
,
mm2
shl
%
2
,
8
or
%
2
,
%
6
%endmacro
cextern
x264_decimate_table4
cextern
x264_decimate_table8
%macro DECIMATE4x4 2
;A LUT is faster than bsf on AMD processors, and no slower on Intel
;This is not true for score64.
cglobal
x264_decimate_score
%
1
_
%
2
,
1
,
3
%ifdef PIC
lea
r10
,
[
x264_decimate_table4
GLOBAL
]
lea
r11
,
[
decimate_mask_table4
GLOBAL
]
%define table r10
%define mask_table r11
%else
%define table x264_decimate_table4
%define mask_table decimate_mask_table4
%endif
DECIMATE_MASK
edx
,
eax
,
r0
,
[
pb_1
GLOBAL
],
%
2
,
ecx
xor
edx
,
0xffff
je
.ret
test
eax
,
eax
jne
.ret9
%if %1==15
shr
edx
,
1
%endif
movzx
ecx
,
dl
movzx
eax
,
byte
[
mask_table
+
rcx
]
cmp
edx
,
ecx
je
.ret
bsr
ecx
,
ecx
shr
edx
,
1
shr
edx
,
cl
bsf
ecx
,
edx
shr
edx
,
1
shr
edx
,
cl
add
al
,
byte
[
table
+
rcx
]
add
al
,
byte
[
mask_table
+
rdx
]
.ret:
REP_RET
.ret9:
mov
eax
,
9
RET
%endmacro
%ifndef ARCH_X86_64
%define DECIMATE_MASK DECIMATE_MASK_MMX
DECIMATE4x4
15
,
mmxext
DECIMATE4x4
16
,
mmxext
%endif
%define DECIMATE_MASK DECIMATE_MASK_SSE2
DECIMATE4x4
15
,
ss
e2
DECIMATE4x4
15
,
ss
se3
DECIMATE4x4
16
,
ss
e2
DECIMATE4x4
16
,
ss
se3
%macro DECIMATE8x8 1
%ifdef ARCH_X86_64
cglobal
x264_decimate_score64_
%
1
,
1
,
4
%ifdef PIC
lea
r10
,
[
x264_decimate_table8
GLOBAL
]
%define table r10
%else
%define table x264_decimate_table8
%endif
mova
m7
,
[
pb_1
GLOBAL
]
DECIMATE_MASK
r1d
,
eax
,
r0
,
m7
,
%
1
,
null
test
eax
,
eax
jne
.ret9
DECIMATE_MASK
r2d
,
eax
,
r0
+
32
,
m7
,
%
1
,
null
shl
r2d
,
16
or
r1d
,
r2d
DECIMATE_MASK
r2d
,
r3d
,
r0
+
64
,
m7
,
%
1
,
null
shl
r2
,
32
or
eax
,
r3d
or
r1
,
r2
DECIMATE_MASK
r2d
,
r3d
,
r0
+
96
,
m7
,
%
1
,
null
shl
r2
,
48
or
r1
,
r2
not
r1
test
r1
,
r1
je
.ret
or
eax
,
r3d
jne
.ret9
.loop:
bsf
rcx
,
r1
shr
r1
,
cl
movzx
ecx
,
byte
[
table
+
rcx
]
add
eax
,
ecx
shr
r1
,
1
jne
.loop
.ret:
REP_RET
.ret9:
mov
eax
,
9
RET
%else
; ARCH
%ifidn %1, mmxext
cglobal
x264_decimate_score64_
%
1
,
1
,
6
%else
cglobal
x264_decimate_score64_
%
1
,
1
,
5
%endif
mova
m7
,
[
pb_1
GLOBAL
]
DECIMATE_MASK
r3
,
r2
,
r0
,
m7
,
%
1
,
r5
test
r2
,
r2
jne
.ret9
DECIMATE_MASK
r4
,
r2
,
r0
+
32
,
m7
,
%
1
,
r5
shl
r4
,
16
or
r3
,
r4
DECIMATE_MASK
r4
,
r1
,
r0
+
64
,
m7
,
%
1
,
r5
or
r2
,
r1
DECIMATE_MASK
r1
,
r0
,
r0
+
96
,
m7
,
%
1
,
r5
shl
r1
,
16
or
r4
,
r1
not
r3
not
r4
mov
r1
,
r3
or
r1
,
r4
je
.ret
or
r0
,
r2
jne
.ret9
;r2 is zero at this point, so we don't need to zero it
.loop:
bsf
ecx
,
r3
test
r3
,
r3
je
.largerun
shrd
r3
,
r4
,
cl
shr
r4
,
cl
movzx
ecx
,
byte
[
x264_decimate_table8
+
ecx
]
add
r0
,
ecx
shrd
r3
,
r4
,
1
shr
r4
,
1
mov
r2
,
r3
or
r2
,
r4
jne
.loop
.ret:
REP_RET
.ret9:
mov
eax
,
9
RET
.largerun:
mov
r3
,
r4
xor
r4
,
r4
bsf
ecx
,
r3
shr
r3
,
cl
shr
r3
,
1
jne
.loop
REP_RET
%endif
; ARCH
%endmacro
%ifndef ARCH_X86_64
INIT_MMX
%define DECIMATE_MASK DECIMATE_MASK_MMX
DECIMATE8x8
mmxext
%endif
INIT_XMM
%define DECIMATE_MASK DECIMATE_MASK_SSE2
DECIMATE8x8
ss
e2
DECIMATE8x8
ss
se3
common/x86/quant.h
View file @
d1fbc652
...
...
@@ -46,5 +46,14 @@ void x264_dequant_8x8_flat16_sse2( int16_t dct[8][8], int dequant_mf[6][8][8], i
void
x264_denoise_dct_mmx
(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
);
void
x264_denoise_dct_sse2
(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
);
void
x264_denoise_dct_ssse3
(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
);
int
x264_decimate_score15_mmxext
(
int16_t
*
dct
);
int
x264_decimate_score15_sse2
(
int16_t
*
dct
);
int
x264_decimate_score15_ssse3
(
int16_t
*
dct
);
int
x264_decimate_score16_mmxext
(
int16_t
*
dct
);
int
x264_decimate_score16_sse2
(
int16_t
*
dct
);
int
x264_decimate_score16_ssse3
(
int16_t
*
dct
);
int
x264_decimate_score64_mmxext
(
int16_t
*
dct
);
int
x264_decimate_score64_sse2
(
int16_t
*
dct
);
int
x264_decimate_score64_ssse3
(
int16_t
*
dct
);
#endif
encoder/macroblock.c
View file @
d1fbc652
...
...
@@ -35,50 +35,6 @@ static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[2][2] )
}
#undef ZIG
/* (ref: JVT-B118)
* x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
* to 0 (low score means set it to null)
* Used in inter macroblock (luma and chroma)
* luma: for a 8x8 block: if score < 4 -> null
* for the complete mb: if score < 6 -> null
* chroma: for the complete mb: if score < 7 -> null
*/
static
int
x264_mb_decimate_score
(
int16_t
*
dct
,
int
i_max
)
{
static
const
int
i_ds_table4
[
16
]
=
{
3
,
2
,
2
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
static
const
int
i_ds_table8
[
64
]
=
{
3
,
3
,
3
,
3
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
const
int
*
ds_table
=
(
i_max
==
64
)
?
i_ds_table8
:
i_ds_table4
;
int
i_score
=
0
;
int
idx
=
i_max
-
1
;
while
(
idx
>=
0
&&
dct
[
idx
]
==
0
)
idx
--
;
while
(
idx
>=
0
)
{
int
i_run
;
if
(
(
unsigned
)(
dct
[
idx
--
]
+
1
)
>
2
)
return
9
;
i_run
=
0
;
while
(
idx
>=
0
&&
dct
[
idx
]
==
0
)
{
idx
--
;
i_run
++
;
}
i_score
+=
ds_table
[
i_run
];
}
return
i_score
;
}
static
ALWAYS_INLINE
void
x264_quant_4x4
(
x264_t
*
h
,
int16_t
dct
[
4
][
4
],
int
i_qp
,
int
i_ctxBlockCat
,
int
b_intra
,
int
idx
)
{
int
i_quant_cat
=
b_intra
?
CQM_4IY
:
CQM_4PY
;
...
...
@@ -249,7 +205,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
h
->
zigzagf
.
scan_4x4
(
h
->
dct
.
luma4x4
[
16
+
i
+
ch
*
4
],
dct4x4
[
i
]
);
if
(
b_decimate
)
i_decimate_score
+=
x264_mb_
decimate_score
(
h
->
dct
.
luma4x4
[
16
+
i
+
ch
*
4
]
+
1
,
15
);
i_decimate_score
+=
h
->
quantf
.
decimate_score
15
(
h
->
dct
.
luma4x4
[
16
+
i
+
ch
*
4
]
);
}
h
->
dctf
.
dct2x2dc
(
dct2x2
);
...
...
@@ -562,7 +518,7 @@ void x264_macroblock_encode( x264_t *h )
if
(
b_decimate
)
{
int
i_decimate_8x8
=
x264_mb_
decimate_score
(
h
->
dct
.
luma8x8
[
idx
]
,
64
);
int
i_decimate_8x8
=
h
->
quantf
.
decimate_score
64
(
h
->
dct
.
luma8x8
[
idx
]
);
i_decimate_mb
+=
i_decimate_8x8
;
if
(
i_decimate_8x8
<
4
)
nnz8x8
[
idx
]
=
0
;
...
...
@@ -606,7 +562,7 @@ void x264_macroblock_encode( x264_t *h )
h
->
zigzagf
.
scan_4x4
(
h
->
dct
.
luma4x4
[
idx
],
dct4x4
[
idx
]
);
if
(
b_decimate
&&
i_decimate_8x8
<=
6
)
i_decimate_8x8
+=
x264_mb_
decimate_score
(
h
->
dct
.
luma4x4
[
idx
]
,
16
);
i_decimate_8x8
+=
h
->
quantf
.
decimate_score
16
(
h
->
dct
.
luma4x4
[
idx
]
);
}
/* decimate this 8x8 block */
...
...
@@ -762,7 +718,7 @@ int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
if
(
!
array_non_zero
(
dct4x4
[
i4x4
])
)
continue
;
h
->
zigzagf
.
scan_4x4
(
dctscan
,
dct4x4
[
i4x4
]
);
i_decimate_mb
+=
x264_mb_
decimate_score
(
dctscan
,
16
);
i_decimate_mb
+=
h
->
quantf
.
decimate_score
16
(
dctscan
);
if
(
i_decimate_mb
>=
6
)
return
0
;
}
...
...
@@ -804,11 +760,12 @@ int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
/* calculate dct coeffs */
for
(
i4x4
=
0
,
i_decimate_mb
=
0
;
i4x4
<
4
;
i4x4
++
)
{
dct4x4
[
i4x4
][
0
][
0
]
=
0
;
h
->
quantf
.
quant_4x4
(
dct4x4
[
i4x4
],
h
->
quant4_mf
[
CQM_4PC
][
i_qp
],
h
->
quant4_bias
[
CQM_4PC
][
i_qp
]
);
if
(
!
array_non_zero
(
dct4x4
[
i4x4
])
)
continue
;
h
->
zigzagf
.
scan_4x4
(
dctscan
,
dct4x4
[
i4x4
]
);
i_decimate_mb
+=
x264_mb_
decimate_score
(
dctscan
+
1
,
15
);
i_decimate_mb
+=
h
->
quantf
.
decimate_score
15
(
dctscan
);
if
(
i_decimate_mb
>=
7
)
return
0
;
}
...
...
@@ -897,7 +854,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
h
->
zigzagf
.
scan_8x8
(
h
->
dct
.
luma8x8
[
i8
],
dct8x8
);
if
(
b_decimate
&&
!
h
->
mb
.
b_trellis
)
nnz8x8
=
4
<=
x264_mb_
decimate_score
(
h
->
dct
.
luma8x8
[
i8
]
,
64
);
nnz8x8
=
4
<=
h
->
quantf
.
decimate_score
64
(
h
->
dct
.
luma8x8
[
i8
]
);
else
nnz8x8
=
array_non_zero
(
dct8x8
);
...
...
@@ -922,7 +879,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
{
int
i_decimate_8x8
=
0
;
for
(
i4
=
0
;
i4
<
4
&&
i_decimate_8x8
<
4
;
i4
++
)
i_decimate_8x8
+=
x264_mb_
decimate_score
(
h
->
dct
.
luma4x4
[
i8
*
4
+
i4
]
,
16
);
i_decimate_8x8
+=
h
->
quantf
.
decimate_score
16
(
h
->
dct
.
luma4x4
[
i8
*
4
+
i4
]
);
nnz8x8
=
4
<=
i_decimate_8x8
;
}
else
...
...
tools/checkasm.c
View file @
d1fbc652
...
...
@@ -1108,6 +1108,37 @@ static int check_quant( int cpu_ref, int cpu_new )
}
report
(
"denoise dct :"
);
#define TEST_DECIMATE( qname, decname, block, w, ac ) \
if( qf_a.decname != qf_ref.decname ) \
{ \
set_func_name( #decname ); \
used_asm = 1; \
for( i = 0; i < 100; i++ ) \
{ \
int result_c, result_a, idx; \
for( idx = 0; idx < w*w; idx++ ) \
dct1[idx] = !(rand()&3) + (!(rand()&15))*(rand()&3); \
if( ac ) \
dct1[0] = 0; \
memcpy( dct2, dct1, w*w*2 ); \
result_c = call_c1( qf_c.decname, (void*)dct2 ); \
result_a = call_a1( qf_a.decname, (void*)dct2 ); \
if( result_c != result_a ) \
{ \
ok = 0; \
fprintf( stderr, #decname ": [FAILED]\n" ); \
break; \
} \
call_c2( qf_c.decname, (void*)dct2 ); \
call_a2( qf_a.decname, (void*)dct2 ); \
} \
}
TEST_DECIMATE
(
quant_8x8
,
decimate_score64
,
CQM_8IY
,
8
,
0
);
TEST_DECIMATE
(
quant_4x4
,
decimate_score16
,
CQM_4IY
,
4
,
0
);
TEST_DECIMATE
(
quant_4x4
,
decimate_score15
,
CQM_4IY
,
4
,
1
);
report
(
"decimate_score :"
);
return
ret
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment