Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
VideoLAN
x264
Commits
a40aa64d
Commit
a40aa64d
authored
Apr 26, 2010
by
Fiona Glaser
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Reduce lookahead memory usage, cache misses
Merge lowres_types with lowres_costs.
parent
a6410b8c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
19 additions
and
18 deletions
+19
-18
common/frame.c
common/frame.c
+0
-6
common/frame.h
common/frame.h
+7
-2
common/mc.c
common/mc.c
+1
-1
common/x86/mc-a2.asm
common/x86/mc-a2.asm
+3
-1
encoder/slicetype.c
encoder/slicetype.c
+8
-8
No files found.
common/frame.c
View file @
a40aa64d
...
...
@@ -148,10 +148,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
CHECKED_MALLOC
(
frame
->
i_propagate_cost
,
(
i_mb_count
+
3
)
*
sizeof
(
uint16_t
)
);
for
(
int
j
=
0
;
j
<=
h
->
param
.
i_bframe
+
1
;
j
++
)
for
(
int
i
=
0
;
i
<=
h
->
param
.
i_bframe
+
1
;
i
++
)
{
CHECKED_MALLOC
(
frame
->
lowres_costs
[
j
][
i
],
(
i_mb_count
+
3
)
*
sizeof
(
uint16_t
)
);
CHECKED_MALLOC
(
frame
->
lowres_inter_types
[
j
][
i
],
(
i_mb_count
+
3
)
/
4
*
sizeof
(
uint8_t
)
);
}
frame
->
i_intra_cost
=
frame
->
lowres_costs
[
0
][
0
];
memset
(
frame
->
i_intra_cost
,
-
1
,
(
i_mb_count
+
3
)
*
sizeof
(
uint16_t
)
);
}
...
...
@@ -199,10 +196,7 @@ void x264_frame_delete( x264_frame_t *frame )
x264_free
(
frame
->
i_propagate_cost
);
for
(
int
j
=
0
;
j
<=
X264_BFRAME_MAX
+
1
;
j
++
)
for
(
int
i
=
0
;
i
<=
X264_BFRAME_MAX
+
1
;
i
++
)
{
x264_free
(
frame
->
lowres_costs
[
j
][
i
]
);
x264_free
(
frame
->
lowres_inter_types
[
j
][
i
]
);
}
x264_free
(
frame
->
f_qp_offset
);
x264_free
(
frame
->
f_qp_offset_aq
);
x264_free
(
frame
->
i_inv_qscale_factor
);
...
...
common/frame.h
View file @
a40aa64d
...
...
@@ -84,9 +84,14 @@ typedef struct x264_frame
uint8_t
*
mb_partition
;
int16_t
(
*
mv
[
2
])[
2
];
int16_t
(
*
lowres_mvs
[
2
][
X264_BFRAME_MAX
+
1
])[
2
];
/* Stored as (lists_used << LOWRES_COST_SHIFT) + (cost).
* Doesn't need special addressing for intra cost because
* lists_used is guaranteed to be zero in that cast. */
uint16_t
(
*
lowres_costs
[
X264_BFRAME_MAX
+
2
][
X264_BFRAME_MAX
+
2
]);
/* Actually a width-2 bitfield with 4 values per uint8_t. */
uint8_t
(
*
lowres_inter_types
[
X264_BFRAME_MAX
+
2
][
X264_BFRAME_MAX
+
2
]);
#define LOWRES_COST_MASK ((1<<14)-1)
#define LOWRES_COST_SHIFT 14
int
*
lowres_mv_costs
[
2
][
X264_BFRAME_MAX
+
1
];
int8_t
*
ref
[
2
];
int
i_ref
[
2
];
...
...
common/mc.c
View file @
a40aa64d
...
...
@@ -427,7 +427,7 @@ static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *i
for
(
int
i
=
0
;
i
<
len
;
i
++
)
{
int
propagate_amount
=
propagate_in
[
i
]
+
((
intra_costs
[
i
]
*
inv_qscales
[
i
]
+
128
)
>>
8
);
dst
[
i
]
=
div_64_32
((
int64_t
)
propagate_amount
*
(
intra_costs
[
i
]
-
inter_costs
[
i
]),
intra_costs
[
i
]);
dst
[
i
]
=
div_64_32
((
int64_t
)
propagate_amount
*
(
intra_costs
[
i
]
-
(
inter_costs
[
i
]
&
LOWRES_COST_MASK
)
),
intra_costs
[
i
]);
}
}
...
...
common/x86/mc-a2.asm
View file @
a40aa64d
...
...
@@ -37,6 +37,7 @@ pw_1: times 8 dw 1
pw_16:
times
8
dw
16
pw_32:
times
8
dw
32
pd_128:
times
4
dd
128
pw_0x3fff:
times
4
dw
0x3fff
SECTION
.text
...
...
@@ -1132,8 +1133,9 @@ cglobal x264_mbtree_propagate_cost_sse2, 6,6
pmaddwd
xmm0
,
xmm2
paddd
xmm0
,
xmm4
psrld
xmm0
,
8
; intra*invq>>8
movq
xmm1
,
[
r1
+
r5
]
; prop
movq
xmm3
,
[
r3
+
r5
]
; inter
movq
xmm1
,
[
r1
+
r5
]
; prop
pand
xmm3
,
[
pw_0x3fff
]
punpcklwd
xmm1
,
xmm5
punpcklwd
xmm3
,
xmm5
paddd
xmm0
,
xmm1
; prop + (intra*invq>>8)
...
...
encoder/slicetype.c
View file @
a40aa64d
...
...
@@ -416,10 +416,6 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
if
(
b_bidir
&&
(
M32
(
m
[
0
].
mv
)
||
M32
(
m
[
1
].
mv
)
)
)
TRY_BIDIR
(
m
[
0
].
mv
,
m
[
1
].
mv
,
5
);
/* Store to width-2 bitfield. */
frames
[
b
]
->
lowres_inter_types
[
b
-
p0
][
p1
-
b
][
i_mb_xy
>>
2
]
&=
~
(
3
<<
((
i_mb_xy
&
3
)
*
2
));
frames
[
b
]
->
lowres_inter_types
[
b
-
p0
][
p1
-
b
][
i_mb_xy
>>
2
]
|=
list_used
<<
((
i_mb_xy
&
3
)
*
2
);
lowres_intra_mb:
if
(
!
fenc
->
b_intra_calculated
)
{
...
...
@@ -481,7 +477,10 @@ lowres_intra_mb:
int
i_icost
=
fenc
->
i_intra_cost
[
i_mb_xy
];
int
b_intra
=
i_icost
<
i_bcost
;
if
(
b_intra
)
{
i_bcost
=
i_icost
;
list_used
=
0
;
}
if
(
b_frame_score_mb
)
fenc
->
i_intra_mbs
[
b
-
p0
]
+=
b_intra
;
}
...
...
@@ -501,7 +500,8 @@ lowres_intra_mb:
}
}
fenc
->
lowres_costs
[
b
-
p0
][
p1
-
b
][
i_mb_xy
]
=
i_bcost
;
assert
(
i_bcost
<
(
1
<<
14
));
fenc
->
lowres_costs
[
b
-
p0
][
p1
-
b
][
i_mb_xy
]
=
i_bcost
+
(
list_used
<<
LOWRES_COST_SHIFT
);
}
#undef TRY_BIDIR
...
...
@@ -615,7 +615,7 @@ static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **fram
for
(
h
->
mb
.
i_mb_x
=
h
->
sps
->
i_mb_width
-
1
;
h
->
mb
.
i_mb_x
>=
0
;
h
->
mb
.
i_mb_x
--
)
{
int
i_mb_xy
=
h
->
mb
.
i_mb_x
+
h
->
mb
.
i_mb_y
*
h
->
mb
.
i_mb_stride
;
int
i_mb_cost
=
frames
[
b
]
->
lowres_costs
[
b
-
p0
][
p1
-
b
][
i_mb_xy
];
int
i_mb_cost
=
frames
[
b
]
->
lowres_costs
[
b
-
p0
][
p1
-
b
][
i_mb_xy
]
&
LOWRES_COST_MASK
;
float
qp_adj
=
qp_offset
[
i_mb_xy
];
i_mb_cost
=
(
i_mb_cost
*
x264_exp2fix8
(
qp_adj
)
+
128
)
>>
8
;
row_satd
[
h
->
mb
.
i_mb_y
]
+=
i_mb_cost
;
...
...
@@ -681,7 +681,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
if
(
propagate_amount
>
0
)
{
/* Access width-2 bitfield. */
int
lists_used
=
(
frames
[
b
]
->
lowres_
inter_type
s
[
b
-
p0
][
p1
-
b
][
mb_index
>>
2
]
>>
((
mb_index
&
3
)
*
2
))
&
3
;
int
lists_used
=
frames
[
b
]
->
lowres_
cost
s
[
b
-
p0
][
p1
-
b
][
mb_index
]
>>
LOWRES_COST_SHIFT
;
/* Follow the MVs to the previous frame(s). */
for
(
int
list
=
0
;
list
<
2
;
list
++
)
if
(
(
lists_used
>>
list
)
&
1
)
...
...
@@ -1490,7 +1490,7 @@ int x264_rc_analyse_slice( x264_t *h )
for
(
int
x
=
h
->
fdec
->
i_pir_start_col
;
x
<=
h
->
fdec
->
i_pir_end_col
;
x
++
,
mb_xy
++
)
{
int
intra_cost
=
(
h
->
fenc
->
i_intra_cost
[
mb_xy
]
*
ip_factor
+
128
)
>>
8
;
int
inter_cost
=
h
->
fenc
->
lowres_costs
[
b
-
p0
][
p1
-
b
][
mb_xy
];
int
inter_cost
=
h
->
fenc
->
lowres_costs
[
b
-
p0
][
p1
-
b
][
mb_xy
]
&
LOWRES_COST_MASK
;
int
diff
=
intra_cost
-
inter_cost
;
if
(
h
->
param
.
rc
.
i_aq_mode
)
h
->
fdec
->
i_row_satd
[
y
]
+=
(
diff
*
frames
[
b
]
->
i_inv_qscale_factor
[
mb_xy
]
+
128
)
>>
8
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment