Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
VideoLAN
x264
Commits
fb660325
Commit
fb660325
authored
Jul 02, 2008
by
Fiona Glaser
Committed by
Loren Merritt
Jul 02, 2008
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
denoise_dct asm
parent
223eedb0
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
105 additions
and
37 deletions
+105
-37
common/common.h
common/common.h
+2
-2
common/quant.c
common/quant.c
+19
-0
common/quant.h
common/quant.h
+2
-0
common/x86/quant-a.asm
common/x86/quant-a.asm
+53
-0
common/x86/quant.h
common/x86/quant.h
+3
-0
encoder/analyse.c
encoder/analyse.c
+1
-1
encoder/macroblock.c
encoder/macroblock.c
+4
-33
encoder/macroblock.h
encoder/macroblock.h
+0
-1
tools/checkasm.c
tools/checkasm.c
+21
-0
No files found.
common/common.h
View file @
fb660325
...
...
@@ -293,8 +293,8 @@ struct x264_t
uint16_t
(
*
quant4_bias
[
4
])[
16
];
/* [4][52][16] */
uint16_t
(
*
quant8_bias
[
2
])[
64
];
/* [2][52][64] */
uint32_t
nr_residual_sum
[
2
][
64
];
uint32_t
nr_offset
[
2
][
64
];
DECLARE_ALIGNED_16
(
uint32_t
nr_residual_sum
[
2
][
64
]
)
;
DECLARE_ALIGNED_16
(
uint16_t
nr_offset
[
2
][
64
]
)
;
uint32_t
nr_count
[
2
];
/* Slice header */
...
...
common/quant.c
View file @
fb660325
...
...
@@ -193,6 +193,20 @@ void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int dequant_mf[6][4][4], int i_q
}
}
void
x264_denoise_dct_core
(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
)
{
int
i
;
for
(
i
=
1
;
i
<
size
;
i
++
)
{
int
level
=
dct
[
i
];
int
sign
=
level
>>
15
;
level
=
(
level
+
sign
)
^
sign
;
sum
[
i
]
+=
level
;
level
-=
offset
[
i
];
dct
[
i
]
=
level
<
0
?
0
:
(
level
^
sign
)
-
sign
;
}
}
void
x264_quant_init
(
x264_t
*
h
,
int
cpu
,
x264_quant_function_t
*
pf
)
{
pf
->
quant_8x8
=
quant_8x8
;
...
...
@@ -203,6 +217,8 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
dequant_4x4
=
dequant_4x4
;
pf
->
dequant_8x8
=
dequant_8x8
;
pf
->
denoise_dct_core
=
x264_denoise_dct_core
;
#ifdef HAVE_MMX
if
(
cpu
&
X264_CPU_MMX
)
{
...
...
@@ -216,6 +232,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
dequant_4x4
=
x264_dequant_4x4_flat16_mmx
;
pf
->
dequant_8x8
=
x264_dequant_8x8_flat16_mmx
;
}
pf
->
denoise_dct_core
=
x264_denoise_dct_core_mmx
;
#endif
}
...
...
@@ -239,6 +256,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
dequant_4x4
=
x264_dequant_4x4_flat16_sse2
;
pf
->
dequant_8x8
=
x264_dequant_8x8_flat16_sse2
;
}
pf
->
denoise_dct_core
=
x264_denoise_dct_core_sse2
;
}
if
(
cpu
&
X264_CPU_SSSE3
)
...
...
@@ -247,6 +265,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf
->
quant_4x4_dc
=
x264_quant_4x4_dc_ssse3
;
pf
->
quant_4x4
=
x264_quant_4x4_ssse3
;
pf
->
quant_8x8
=
x264_quant_8x8_ssse3
;
pf
->
denoise_dct_core
=
x264_denoise_dct_core_ssse3
;
}
#endif // HAVE_MMX
...
...
common/quant.h
View file @
fb660325
...
...
@@ -32,6 +32,8 @@ typedef struct
void
(
*
dequant_4x4
)(
int16_t
dct
[
4
][
4
],
int
dequant_mf
[
6
][
4
][
4
],
int
i_qp
);
void
(
*
dequant_8x8
)(
int16_t
dct
[
8
][
8
],
int
dequant_mf
[
6
][
8
][
8
],
int
i_qp
);
void
(
*
denoise_dct_core
)(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
);
}
x264_quant_function_t
;
void
x264_quant_init
(
x264_t
*
h
,
int
cpu
,
x264_quant_function_t
*
pf
);
...
...
common/x86/quant-a.asm
View file @
fb660325
...
...
@@ -328,3 +328,56 @@ INIT_XMM
DEQUANT
ss
e2
,
4
,
4
,
2
DEQUANT
ss
e2
,
8
,
6
,
2
;-----------------------------------------------------------------------------
; void x264_denoise_dct_core_mmx( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
;-----------------------------------------------------------------------------
%macro DENOISE_DCT 1
cglobal
x264_denoise_dct_core_
%
1
,
4
,
5
movzx
r4d
,
word
[
r0
]
; backup DC coefficient
pxor
m7
,
m7
.loop:
sub
r3
,
regsize
mova
m2
,
[
r0
+
r3
*
2
+
0
*
regsize
]
mova
m3
,
[
r0
+
r3
*
2
+
1
*
regsize
]
PABSW
m0
,
m2
PABSW
m1
,
m3
mova
m4
,
m0
mova
m5
,
m1
psubusw
m0
,
[
r2
+
r3
*
2
+
0
*
regsize
]
psubusw
m1
,
[
r2
+
r3
*
2
+
1
*
regsize
]
PSIGNW
m0
,
m2
PSIGNW
m1
,
m3
mova
[
r0
+
r3
*
2
+
0
*
regsize
],
m0
mova
[
r0
+
r3
*
2
+
1
*
regsize
],
m1
mova
m2
,
m4
mova
m3
,
m5
punpcklwd
m4
,
m7
punpckhwd
m2
,
m7
punpcklwd
m5
,
m7
punpckhwd
m3
,
m7
paddd
m4
,
[
r1
+
r3
*
4
+
0
*
regsize
]
paddd
m2
,
[
r1
+
r3
*
4
+
1
*
regsize
]
paddd
m5
,
[
r1
+
r3
*
4
+
2
*
regsize
]
paddd
m3
,
[
r1
+
r3
*
4
+
3
*
regsize
]
mova
[
r1
+
r3
*
4
+
0
*
regsize
],
m4
mova
[
r1
+
r3
*
4
+
1
*
regsize
],
m2
mova
[
r1
+
r3
*
4
+
2
*
regsize
],
m5
mova
[
r1
+
r3
*
4
+
3
*
regsize
],
m3
jg
.loop
mov
[
r0
],
r4w
; restore DC coefficient
RET
%endmacro
%define PABSW PABSW_MMX
%define PSIGNW PSIGNW_MMX
%ifndef ARCH_X86_64
INIT_MMX
DENOISE_DCT
mmx
%endif
INIT_XMM
DENOISE_DCT
ss
e2
%define PABSW PABSW_SSSE3
%define PSIGNW PSIGNW_SSSE3
DENOISE_DCT
ss
se3
common/x86/quant.h
View file @
fb660325
...
...
@@ -42,5 +42,8 @@ void x264_dequant_4x4_flat16_mmx( int16_t dct[4][4], int dequant_mf[6][4][4], in
void
x264_dequant_8x8_flat16_mmx
(
int16_t
dct
[
8
][
8
],
int
dequant_mf
[
6
][
8
][
8
],
int
i_qp
);
void
x264_dequant_4x4_flat16_sse2
(
int16_t
dct
[
4
][
4
],
int
dequant_mf
[
6
][
4
][
4
],
int
i_qp
);
void
x264_dequant_8x8_flat16_sse2
(
int16_t
dct
[
8
][
8
],
int
dequant_mf
[
6
][
8
][
8
],
int
i_qp
);
void
x264_denoise_dct_core_mmx
(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
);
void
x264_denoise_dct_core_sse2
(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
);
void
x264_denoise_dct_core_ssse3
(
int16_t
*
dct
,
uint32_t
*
sum
,
uint16_t
*
offset
,
int
size
);
#endif
encoder/analyse.c
View file @
fb660325
...
...
@@ -2593,7 +2593,7 @@ void x264_macroblock_analyse( x264_t *h )
x264_mb_analyse_transform
(
h
);
h
->
mb
.
b_trellis
=
h
->
param
.
analyse
.
i_trellis
;
h
->
mb
.
b_noise_reduction
=
h
->
param
.
analyse
.
i_noise_reduction
;
h
->
mb
.
b_noise_reduction
=
!!
h
->
param
.
analyse
.
i_noise_reduction
;
if
(
h
->
mb
.
b_trellis
==
1
||
h
->
mb
.
b_noise_reduction
)
h
->
mb
.
i_skip_intra
=
0
;
}
...
...
encoder/macroblock.c
View file @
fb660325
...
...
@@ -443,11 +443,12 @@ void x264_macroblock_encode( x264_t *h )
DECLARE_ALIGNED_16
(
int16_t
dct8x8
[
4
][
8
][
8
]
);
b_decimate
&=
!
h
->
mb
.
b_trellis
;
// 8x8 trellis is inherently optimal decimation
h
->
dctf
.
sub16x16_dct8
(
dct8x8
,
h
->
mb
.
pic
.
p_fenc
[
0
],
h
->
mb
.
pic
.
p_fdec
[
0
]
);
h
->
nr_count
[
1
]
+=
h
->
mb
.
b_noise_reduction
*
4
;
for
(
idx
=
0
;
idx
<
4
;
idx
++
)
{
if
(
h
->
mb
.
b_noise_reduction
)
x264_
denoise_dct
(
h
,
(
int16_t
*
)
dct8x8
[
idx
]
);
h
->
quantf
.
denoise_dct
_core
(
*
dct8x8
[
idx
],
h
->
nr_residual_sum
[
1
],
h
->
nr_offset
[
1
],
64
);
if
(
h
->
mb
.
b_trellis
)
x264_quant_8x8_trellis
(
h
,
dct8x8
[
idx
],
CQM_8PY
,
i_qp
,
0
);
else
...
...
@@ -482,6 +483,7 @@ void x264_macroblock_encode( x264_t *h )
{
DECLARE_ALIGNED_16
(
int16_t
dct4x4
[
16
][
4
][
4
]
);
h
->
dctf
.
sub16x16_dct
(
dct4x4
,
h
->
mb
.
pic
.
p_fenc
[
0
],
h
->
mb
.
pic
.
p_fdec
[
0
]
);
h
->
nr_count
[
0
]
+=
h
->
mb
.
b_noise_reduction
*
16
;
for
(
i8x8
=
0
;
i8x8
<
4
;
i8x8
++
)
{
...
...
@@ -494,7 +496,7 @@ void x264_macroblock_encode( x264_t *h )
idx
=
i8x8
*
4
+
i4x4
;
if
(
h
->
mb
.
b_noise_reduction
)
x264_
denoise_dct
(
h
,
(
int16_t
*
)
dct4x4
[
idx
]
);
h
->
quantf
.
denoise_dct
_core
(
*
dct4x4
[
idx
],
h
->
nr_residual_sum
[
0
],
h
->
nr_offset
[
0
],
16
);
if
(
h
->
mb
.
b_trellis
)
x264_quant_4x4_trellis
(
h
,
dct4x4
[
idx
],
CQM_4PY
,
i_qp
,
DCT_LUMA_4x4
,
0
);
else
...
...
@@ -738,37 +740,6 @@ void x264_noise_reduction_update( x264_t *h )
}
}
void
x264_denoise_dct
(
x264_t
*
h
,
int16_t
*
dct
)
{
const
int
cat
=
h
->
mb
.
b_transform_8x8
;
int
i
;
h
->
nr_count
[
cat
]
++
;
for
(
i
=
(
cat
?
63
:
15
);
i
>=
1
;
i
--
)
{
int
level
=
dct
[
i
];
if
(
level
)
{
if
(
level
>
0
)
{
h
->
nr_residual_sum
[
cat
][
i
]
+=
level
;
level
-=
h
->
nr_offset
[
cat
][
i
];
if
(
level
<
0
)
level
=
0
;
}
else
{
h
->
nr_residual_sum
[
cat
][
i
]
-=
level
;
level
+=
h
->
nr_offset
[
cat
][
i
];
if
(
level
>
0
)
level
=
0
;
}
dct
[
i
]
=
level
;
}
}
}
/*****************************************************************************
* RD only; 4 calls to this do not make up for one macroblock_encode.
* doesn't transform chroma dc.
...
...
encoder/macroblock.h
View file @
fb660325
...
...
@@ -55,7 +55,6 @@ void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
int
i_qp
,
int
b_intra
);
void
x264_noise_reduction_update
(
x264_t
*
h
);
void
x264_denoise_dct
(
x264_t
*
h
,
int16_t
*
dct
);
#endif
tools/checkasm.c
View file @
fb660325
...
...
@@ -1023,6 +1023,27 @@ static int check_quant( int cpu_ref, int cpu_new )
ok
=
oks
[
1
];
used_asm
=
used_asms
[
1
];
report
(
"dequant :"
);
if
(
qf_a
.
denoise_dct_core
!=
qf_ref
.
denoise_dct_core
)
{
int
size
;
for
(
size
=
16
;
size
<=
64
;
size
+=
48
)
{
set_func_name
(
"denoise_dct"
);
used_asm
=
1
;
memcpy
(
dct1
,
buf1
,
size
*
2
);
memcpy
(
dct2
,
buf1
,
size
*
2
);
memcpy
(
buf3
+
256
,
buf3
,
256
);
call_c1
(
qf_c
.
denoise_dct_core
,
dct1
,
(
uint32_t
*
)
buf3
,
(
uint16_t
*
)
buf2
,
size
);
call_a1
(
qf_a
.
denoise_dct_core
,
dct2
,
(
uint32_t
*
)(
buf3
+
256
),
(
uint16_t
*
)
buf2
,
size
);
if
(
memcmp
(
dct1
,
dct2
,
size
*
2
)
||
memcmp
(
buf3
+
4
,
buf3
+
256
+
4
,
(
size
-
1
)
*
sizeof
(
uint32_t
)
)
)
ok
=
0
;
call_c2
(
qf_c
.
denoise_dct_core
,
dct1
,
(
uint32_t
*
)
buf3
,
(
uint16_t
*
)
buf2
,
size
);
call_a2
(
qf_a
.
denoise_dct_core
,
dct2
,
(
uint32_t
*
)(
buf3
+
256
),
(
uint16_t
*
)
buf2
,
size
);
}
}
report
(
"denoise dct :"
);
return
ret
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment