Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
VideoLAN
x264
Commits
5469a4ba
Commit
5469a4ba
authored
Mar 16, 2008
by
Fiona Glaser
Committed by
Loren Merritt
Mar 17, 2008
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
memcpy_aligned_sse2
parent
9d0c0a90
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
78 additions
and
14 deletions
+78
-14
common/cabac.h
common/cabac.h
+3
-2
common/macroblock.c
common/macroblock.c
+1
-1
common/mc.c
common/mc.c
+1
-0
common/mc.h
common/mc.h
+2
-0
common/x86/mc-a2.asm
common/x86/mc-a2.asm
+53
-0
common/x86/mc-c.c
common/x86/mc-c.c
+7
-1
encoder/rdo.c
encoder/rdo.c
+11
-10
No files found.
common/cabac.h
View file @
5469a4ba
...
...
@@ -27,7 +27,9 @@
typedef
struct
{
/* context */
uint8_t
state
[
460
];
DECLARE_ALIGNED
(
uint8_t
,
state
[
460
],
16
);
int
f8_bits_encoded
;
// only if using x264_cabac_size_decision()
/* state */
int
i_low
;
...
...
@@ -36,7 +38,6 @@ typedef struct
/* bit stream */
int
i_queue
;
int
i_bytes_outstanding
;
int
f8_bits_encoded
;
// only if using x264_cabac_size_decision()
uint8_t
*
p_start
;
uint8_t
*
p
;
...
...
common/macroblock.c
View file @
5469a4ba
...
...
@@ -502,7 +502,7 @@ int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
for
(
l
=
0
;
l
<
2
;
l
++
)
for
(
i
=
0
;
i
<
4
;
i
++
)
h
->
mb
.
cache
.
direct_ref
[
l
][
i
]
=
h
->
mb
.
cache
.
ref
[
l
][
x264_scan8
[
i
*
4
]];
memcpy
(
h
->
mb
.
cache
.
direct_mv
,
h
->
mb
.
cache
.
mv
,
sizeof
(
h
->
mb
.
cache
.
mv
));
h
->
mc
.
memcpy_aligned
(
h
->
mb
.
cache
.
direct_mv
,
h
->
mb
.
cache
.
mv
,
sizeof
(
h
->
mb
.
cache
.
mv
));
}
return
b_available
;
...
...
common/mc.c
View file @
5469a4ba
...
...
@@ -372,6 +372,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
pf
->
prefetch_fenc
=
prefetch_fenc_null
;
pf
->
prefetch_ref
=
prefetch_ref_null
;
pf
->
memcpy_aligned
=
memcpy
;
#ifdef HAVE_MMX
x264_mc_init_mmx
(
cpu
,
pf
);
...
...
common/mc.h
View file @
5469a4ba
...
...
@@ -65,6 +65,8 @@ typedef struct
uint8_t
*
pix_uv
,
int
stride_uv
,
int
mb_x
);
/* prefetch the next few macroblocks of a hpel reference frame */
void
(
*
prefetch_ref
)(
uint8_t
*
pix
,
int
stride
,
int
parity
);
void
*
(
*
memcpy_aligned
)(
void
*
dst
,
const
void
*
src
,
size_t
n
);
}
x264_mc_functions_t
;
...
...
common/x86/mc-a2.asm
View file @
5469a4ba
...
...
@@ -336,3 +336,56 @@ cglobal x264_plane_copy_mmxext, 6,7
emms
RET
;-----------------------------------------------------------------------------
; void *x264_memcpy_aligned_mmx( void *dst, const void *src, size_t n );
;-----------------------------------------------------------------------------
cglobal
x264_memcpy_aligned_mmx
,
3
,
3
test
r2d
,
16
jz
.copy32
sub
r2d
,
16
movq
mm0
,
[
r1
+
r2
+
0
]
movq
mm1
,
[
r1
+
r2
+
8
]
movq
[
r0
+
r2
+
0
],
mm0
movq
[
r0
+
r2
+
8
],
mm1
.copy32:
sub
r2d
,
32
movq
mm0
,
[
r1
+
r2
+
0
]
movq
mm1
,
[
r1
+
r2
+
8
]
movq
mm2
,
[
r1
+
r2
+
16
]
movq
mm3
,
[
r1
+
r2
+
24
]
movq
[
r0
+
r2
+
0
],
mm0
movq
[
r0
+
r2
+
8
],
mm1
movq
[
r0
+
r2
+
16
],
mm2
movq
[
r0
+
r2
+
24
],
mm3
jg
.copy32
REP_RET
;-----------------------------------------------------------------------------
; void *x264_memcpy_aligned_sse2( void *dst, const void *src, size_t n );
;-----------------------------------------------------------------------------
cglobal
x264_memcpy_aligned_sse2
,
3
,
3
test
r2d
,
16
jz
.copy32
sub
r2d
,
16
movdqa
xmm0
,
[
r1
+
r2
]
movdqa
[
r0
+
r2
],
xmm0
.copy32:
test
r2d
,
32
jz
.copy64
sub
r2d
,
32
movdqa
xmm0
,
[
r1
+
r2
+
0
]
movdqa
xmm1
,
[
r1
+
r2
+
16
]
movdqa
[
r0
+
r2
+
0
],
xmm0
movdqa
[
r0
+
r2
+
16
],
xmm1
.copy64:
sub
r2d
,
64
movdqa
xmm0
,
[
r1
+
r2
+
0
]
movdqa
xmm1
,
[
r1
+
r2
+
16
]
movdqa
xmm2
,
[
r1
+
r2
+
32
]
movdqa
xmm3
,
[
r1
+
r2
+
48
]
movdqa
[
r0
+
r2
+
0
],
xmm0
movdqa
[
r0
+
r2
+
16
],
xmm1
movdqa
[
r0
+
r2
+
32
],
xmm2
movdqa
[
r0
+
r2
+
48
],
xmm3
jg
.copy64
REP_RET
common/x86/mc-c.c
View file @
5469a4ba
...
...
@@ -56,6 +56,8 @@ extern void x264_mc_chroma_mmxext( uint8_t *src, int i_src_stride,
extern
void
x264_plane_copy_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
w
,
int
h
);
extern
void
x264_hpel_filter_mmxext
(
uint8_t
*
dsth
,
uint8_t
*
dstv
,
uint8_t
*
dstc
,
uint8_t
*
src
,
int
i_stride
,
int
i_width
,
int
i_height
);
extern
void
*
x264_memcpy_aligned_mmx
(
void
*
dst
,
const
void
*
src
,
size_t
n
);
extern
void
*
x264_memcpy_aligned_sse2
(
void
*
dst
,
const
void
*
src
,
size_t
n
);
#define AVG_WEIGHT(W,H) \
void x264_pixel_avg_weight_ ## W ## x ## H ## _mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int i_weight_dst ) \
...
...
@@ -144,6 +146,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf
->
copy
[
PIXEL_16x16
]
=
x264_mc_copy_w16_mmx
;
pf
->
copy
[
PIXEL_8x8
]
=
x264_mc_copy_w8_mmx
;
pf
->
copy
[
PIXEL_4x4
]
=
x264_mc_copy_w4_mmx
;
pf
->
memcpy_aligned
=
x264_memcpy_aligned_mmx
;
if
(
!
(
cpu
&
X264_CPU_MMXEXT
)
)
return
;
...
...
@@ -175,5 +178,8 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf
->
prefetch_fenc
=
x264_prefetch_fenc_mmxext
;
pf
->
prefetch_ref
=
x264_prefetch_ref_mmxext
;
/* todo: use sse2 */
if
(
!
(
cpu
&
X264_CPU_SSE2
)
)
return
;
pf
->
memcpy_aligned
=
x264_memcpy_aligned_sse2
;
}
encoder/rdo.c
View file @
5469a4ba
...
...
@@ -82,8 +82,8 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
}
else
if
(
h
->
param
.
b_cabac
)
{
x264_cabac_t
cabac_tmp
=
h
->
cabac
;
cabac_tmp
.
f8_bits_encoded
=
0
;
x264_cabac_t
cabac_tmp
;
h
->
mc
.
memcpy_aligned
(
&
cabac_tmp
,
&
h
->
cabac
,
offsetof
(
x264_cabac_t
,
i_low
)
)
;
x264_macroblock_size_cabac
(
h
,
&
cabac_tmp
);
i_bits
=
(
cabac_tmp
.
f8_bits_encoded
*
i_lambda2
+
128
)
>>
8
;
}
...
...
@@ -124,8 +124,8 @@ int x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel )
if
(
h
->
param
.
b_cabac
)
{
x264_cabac_t
cabac_tmp
=
h
->
cabac
;
cabac_tmp
.
f8_bits_encoded
=
0
;
x264_cabac_t
cabac_tmp
;
h
->
mc
.
memcpy_aligned
(
&
cabac_tmp
,
&
h
->
cabac
,
offsetof
(
x264_cabac_t
,
i_low
)
)
;
x264_partition_size_cabac
(
h
,
&
cabac_tmp
,
i8
,
i_pixel
);
i_bits
=
(
cabac_tmp
.
f8_bits_encoded
*
i_lambda2
+
128
)
>>
8
;
}
...
...
@@ -146,8 +146,8 @@ int x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode )
if
(
h
->
param
.
b_cabac
)
{
x264_cabac_t
cabac_tmp
=
h
->
cabac
;
cabac_tmp
.
f8_bits_encoded
=
0
;
x264_cabac_t
cabac_tmp
;
h
->
mc
.
memcpy_aligned
(
&
cabac_tmp
,
&
h
->
cabac
,
offsetof
(
x264_cabac_t
,
i_low
)
)
;
x264_partition_i8x8_size_cabac
(
h
,
&
cabac_tmp
,
i8
,
i_mode
);
i_bits
=
(
cabac_tmp
.
f8_bits_encoded
*
i_lambda2
+
128
)
>>
8
;
}
...
...
@@ -168,8 +168,9 @@ int x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode )
if
(
h
->
param
.
b_cabac
)
{
x264_cabac_t
cabac_tmp
=
h
->
cabac
;
cabac_tmp
.
f8_bits_encoded
=
0
;
x264_cabac_t
cabac_tmp
;
h
->
mc
.
memcpy_aligned
(
&
cabac_tmp
,
&
h
->
cabac
,
offsetof
(
x264_cabac_t
,
i_low
)
);
x264_partition_i4x4_size_cabac
(
h
,
&
cabac_tmp
,
i4
,
i_mode
);
i_bits
=
(
cabac_tmp
.
f8_bits_encoded
*
i_lambda2
+
128
)
>>
8
;
}
...
...
@@ -194,8 +195,8 @@ int x264_rd_cost_i8x8_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct )
if
(
h
->
param
.
b_cabac
)
{
x264_cabac_t
cabac_tmp
=
h
->
cabac
;
cabac_tmp
.
f8_bits_encoded
=
0
;
x264_cabac_t
cabac_tmp
;
h
->
mc
.
memcpy_aligned
(
&
cabac_tmp
,
&
h
->
cabac
,
offsetof
(
x264_cabac_t
,
i_low
)
)
;
x264_i8x8_chroma_size_cabac
(
h
,
&
cabac_tmp
);
i_bits
=
(
cabac_tmp
.
f8_bits_encoded
*
i_lambda2
+
128
)
>>
8
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment