Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
VideoLAN
x264
Commits
32bd2d64
Commit
32bd2d64
authored
May 15, 2008
by
Fiona Glaser
Committed by
Loren Merritt
May 17, 2008
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
force unroll macroblock_load_pic_pointers
and a few other minor optimizations
parent
2d816a51
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
45 additions
and
53 deletions
+45
-53
common/macroblock.c
common/macroblock.c
+39
-39
encoder/macroblock.c
encoder/macroblock.c
+6
-14
No files found.
common/macroblock.c
View file @
32bd2d64
...
...
@@ -1011,6 +1011,42 @@ static NOINLINE void copy_column8( uint8_t *dst, uint8_t *src )
dst
[
i
*
FDEC_STRIDE
]
=
src
[
i
*
FDEC_STRIDE
];
}
static
void
ALWAYS_INLINE
x264_macroblock_load_pic_pointers
(
x264_t
*
h
,
int
i_mb_x
,
int
i_mb_y
,
int
i
)
{
const
int
w
=
(
i
==
0
?
16
:
8
);
const
int
i_stride
=
h
->
fdec
->
i_stride
[
i
];
const
int
i_stride2
=
i_stride
<<
h
->
mb
.
b_interlaced
;
const
int
i_pix_offset
=
h
->
mb
.
b_interlaced
?
w
*
(
i_mb_x
+
(
i_mb_y
&~
1
)
*
i_stride
)
+
(
i_mb_y
&
1
)
*
i_stride
:
w
*
(
i_mb_x
+
i_mb_y
*
i_stride
);
int
ref_pix_offset
[
2
]
=
{
i_pix_offset
,
i_pix_offset
};
const
uint8_t
*
intra_fdec
=
&
h
->
mb
.
intra_border_backup
[
i_mb_y
&
h
->
sh
.
b_mbaff
][
i
][
i_mb_x
*
16
>>!!
i
];
x264_frame_t
**
fref
[
2
]
=
{
h
->
fref0
,
h
->
fref1
};
int
j
,
k
,
l
;
if
(
h
->
mb
.
b_interlaced
)
ref_pix_offset
[
1
]
+=
(
1
-
2
*
(
i_mb_y
&
1
))
*
i_stride
;
h
->
mb
.
pic
.
i_stride
[
i
]
=
i_stride2
;
h
->
mc
.
copy
[
i
?
PIXEL_8x8
:
PIXEL_16x16
](
h
->
mb
.
pic
.
p_fenc
[
i
],
FENC_STRIDE
,
&
h
->
fenc
->
plane
[
i
][
i_pix_offset
],
i_stride2
,
w
);
memcpy
(
&
h
->
mb
.
pic
.
p_fdec
[
i
][
-
1
-
FDEC_STRIDE
],
intra_fdec
-
1
,
w
*
3
/
2
+
1
);
if
(
h
->
mb
.
b_interlaced
)
{
const
uint8_t
*
plane_fdec
=
&
h
->
fdec
->
plane
[
i
][
i_pix_offset
];
for
(
j
=
0
;
j
<
w
;
j
++
)
h
->
mb
.
pic
.
p_fdec
[
i
][
-
1
+
j
*
FDEC_STRIDE
]
=
plane_fdec
[
-
1
+
j
*
i_stride2
];
}
for
(
l
=
0
;
l
<
2
;
l
++
)
{
for
(
j
=
0
;
j
<
h
->
mb
.
pic
.
i_fref
[
l
];
j
++
)
{
h
->
mb
.
pic
.
p_fref
[
l
][
j
][
i
==
0
?
0
:
i
+
3
]
=
&
fref
[
l
][
j
>>
h
->
mb
.
b_interlaced
]
->
plane
[
i
][
ref_pix_offset
[
j
&
1
]];
if
(
i
==
0
)
for
(
k
=
1
;
k
<
4
;
k
++
)
h
->
mb
.
pic
.
p_fref
[
l
][
j
][
k
]
=
&
fref
[
l
][
j
>>
h
->
mb
.
b_interlaced
]
->
filtered
[
k
][
ref_pix_offset
[
j
&
1
]];
}
}
}
void
x264_macroblock_cache_load
(
x264_t
*
h
,
int
i_mb_x
,
int
i_mb_y
)
{
int
i_mb_xy
=
i_mb_y
*
h
->
mb
.
i_mb_stride
+
i_mb_x
;
...
...
@@ -1189,45 +1225,9 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
}
/* load picture pointers */
for
(
i
=
0
;
i
<
3
;
i
++
)
{
const
int
w
=
(
i
==
0
?
16
:
8
);
const
int
i_stride
=
h
->
fdec
->
i_stride
[
i
];
const
int
i_stride2
=
i_stride
<<
h
->
mb
.
b_interlaced
;
const
int
i_pix_offset
=
h
->
mb
.
b_interlaced
?
w
*
(
i_mb_x
+
(
i_mb_y
&~
1
)
*
i_stride
)
+
(
i_mb_y
&
1
)
*
i_stride
:
w
*
(
i_mb_x
+
i_mb_y
*
i_stride
);
int
ref_pix_offset
[
2
]
=
{
i_pix_offset
,
i_pix_offset
};
const
uint8_t
*
intra_fdec
=
&
h
->
mb
.
intra_border_backup
[
i_mb_y
&
h
->
sh
.
b_mbaff
][
i
][
i_mb_x
*
16
>>!!
i
];
x264_frame_t
**
fref
[
2
]
=
{
h
->
fref0
,
h
->
fref1
};
int
j
,
k
,
l
;
if
(
h
->
mb
.
b_interlaced
)
ref_pix_offset
[
1
]
+=
(
1
-
2
*
(
i_mb_y
&
1
))
*
i_stride
;
h
->
mb
.
pic
.
i_stride
[
i
]
=
i_stride2
;
h
->
mc
.
copy
[
i
?
PIXEL_8x8
:
PIXEL_16x16
](
h
->
mb
.
pic
.
p_fenc
[
i
],
FENC_STRIDE
,
&
h
->
fenc
->
plane
[
i
][
i_pix_offset
],
i_stride2
,
w
);
memcpy
(
&
h
->
mb
.
pic
.
p_fdec
[
i
][
-
1
-
FDEC_STRIDE
],
intra_fdec
-
1
,
w
*
3
/
2
+
1
);
if
(
h
->
mb
.
b_interlaced
)
{
const
uint8_t
*
plane_fdec
=
&
h
->
fdec
->
plane
[
i
][
i_pix_offset
];
for
(
j
=
0
;
j
<
w
;
j
++
)
h
->
mb
.
pic
.
p_fdec
[
i
][
-
1
+
j
*
FDEC_STRIDE
]
=
plane_fdec
[
-
1
+
j
*
i_stride2
];
}
for
(
l
=
0
;
l
<
2
;
l
++
)
{
for
(
j
=
0
;
j
<
h
->
mb
.
pic
.
i_fref
[
l
];
j
++
)
{
h
->
mb
.
pic
.
p_fref
[
l
][
j
][
i
==
0
?
0
:
i
+
3
]
=
&
fref
[
l
][
j
>>
h
->
mb
.
b_interlaced
]
->
plane
[
i
][
ref_pix_offset
[
j
&
1
]];
if
(
i
==
0
)
for
(
k
=
1
;
k
<
4
;
k
++
)
h
->
mb
.
pic
.
p_fref
[
l
][
j
][
k
]
=
&
fref
[
l
][
j
>>
h
->
mb
.
b_interlaced
]
->
filtered
[
k
][
ref_pix_offset
[
j
&
1
]];
}
}
}
x264_macroblock_load_pic_pointers
(
h
,
i_mb_x
,
i_mb_y
,
0
);
x264_macroblock_load_pic_pointers
(
h
,
i_mb_x
,
i_mb_y
,
1
);
x264_macroblock_load_pic_pointers
(
h
,
i_mb_x
,
i_mb_y
,
2
);
if
(
h
->
fdec
->
integral
)
{
...
...
encoder/macroblock.c
View file @
32bd2d64
...
...
@@ -64,7 +64,7 @@ static int x264_mb_decimate_score( int16_t *dct, int i_max )
{
int
i_run
;
if
(
abs
(
dct
[
idx
--
]
)
>
1
)
if
(
(
unsigned
)(
dct
[
idx
--
]
+
1
)
>
2
)
return
9
;
i_run
=
0
;
...
...
@@ -273,15 +273,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
static
void
x264_macroblock_encode_skip
(
x264_t
*
h
)
{
int
i
;
h
->
mb
.
i_cbp_luma
=
0x00
;
h
->
mb
.
i_cbp_chroma
=
0x00
;
for
(
i
=
0
;
i
<
16
+
8
;
i
++
)
{
h
->
mb
.
cache
.
non_zero_count
[
x264_scan8
[
i
]]
=
0
;
}
memset
(
h
->
mb
.
cache
.
non_zero_count
,
0
,
X264_SCAN8_SIZE
);
/* store cbp */
h
->
mb
.
cbp
[
h
->
mb
.
i_mb_xy
]
=
0
;
}
...
...
@@ -500,8 +494,8 @@ void x264_macroblock_encode( x264_t *h )
h
->
quantf
.
quant_4x4
(
dct4x4
[
idx
],
h
->
quant4_mf
[
CQM_4PY
][
i_qp
],
h
->
quant4_bias
[
CQM_4PY
][
i_qp
]
);
h
->
zigzagf
.
scan_4x4
(
h
->
dct
.
luma4x4
[
idx
],
dct4x4
[
idx
]
);
if
(
b_decimate
)
if
(
b_decimate
&&
i_decimate_8x8
<=
6
)
i_decimate_8x8
+=
x264_mb_decimate_score
(
h
->
dct
.
luma4x4
[
idx
],
16
);
}
...
...
@@ -799,10 +793,8 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
int
i4
;
DECLARE_ALIGNED_16
(
int16_t
dct4x4
[
4
][
4
][
4
]
);
h
->
dctf
.
sub8x8_dct
(
dct4x4
,
p_fenc
,
p_fdec
);
h
->
quantf
.
quant_4x4
(
dct4x4
[
0
],
h
->
quant4_mf
[
CQM_4PY
][
i_qp
],
h
->
quant4_bias
[
CQM_4PY
][
i_qp
]
);
h
->
quantf
.
quant_4x4
(
dct4x4
[
1
],
h
->
quant4_mf
[
CQM_4PY
][
i_qp
],
h
->
quant4_bias
[
CQM_4PY
][
i_qp
]
);
h
->
quantf
.
quant_4x4
(
dct4x4
[
2
],
h
->
quant4_mf
[
CQM_4PY
][
i_qp
],
h
->
quant4_bias
[
CQM_4PY
][
i_qp
]
);
h
->
quantf
.
quant_4x4
(
dct4x4
[
3
],
h
->
quant4_mf
[
CQM_4PY
][
i_qp
],
h
->
quant4_bias
[
CQM_4PY
][
i_qp
]
);
for
(
i4
=
0
;
i4
<
4
;
i4
++
)
h
->
quantf
.
quant_4x4
(
dct4x4
[
i4
],
h
->
quant4_mf
[
CQM_4PY
][
i_qp
],
h
->
quant4_bias
[
CQM_4PY
][
i_qp
]
);
for
(
i4
=
0
;
i4
<
4
;
i4
++
)
h
->
zigzagf
.
scan_4x4
(
h
->
dct
.
luma4x4
[
i8
*
4
+
i4
],
dct4x4
[
i4
]
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment