Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
VideoLAN
x264
Commits
1d22dd50
Commit
1d22dd50
authored
Dec 19, 2010
by
Daniel Kang
Committed by
Fiona Glaser
Jan 10, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MMX version of high bit depth plane_copy
And various cosmetics. Patch from Google Code-In
parent
341b6147
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
44 additions
and
47 deletions
+44
-47
common/frame.c
common/frame.c
+4
-4
common/mc.c
common/mc.c
+1
-1
common/mc.h
common/mc.h
+1
-1
common/x86/mc-a2.asm
common/x86/mc-a2.asm
+24
-22
common/x86/mc-c.c
common/x86/mc-c.c
+12
-17
tools/checkasm.c
tools/checkasm.c
+2
-2
No files found.
common/frame.c
View file @
1d22dd50
...
...
@@ -287,13 +287,13 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
uint8_t
*
pix
[
3
];
int
stride
[
3
];
get_plane_ptr
(
h
,
src
,
&
pix
[
0
],
&
stride
[
0
],
0
,
0
,
0
);
h
->
mc
.
plane_copy
(
dst
->
plane
[
0
],
dst
->
i_stride
[
0
],
pix
[
0
],
stride
[
0
],
h
->
param
.
i_width
,
h
->
param
.
i_height
);
h
->
mc
.
plane_copy
(
dst
->
plane
[
0
],
dst
->
i_stride
[
0
],
(
pix
el
*
)
pix
[
0
],
stride
[
0
]
/
sizeof
(
pixel
),
h
->
param
.
i_width
,
h
->
param
.
i_height
);
if
(
i_csp
==
X264_CSP_NV12
)
{
get_plane_ptr
(
h
,
src
,
&
pix
[
1
],
&
stride
[
1
],
1
,
0
,
1
);
h
->
mc
.
plane_copy
(
dst
->
plane
[
1
],
dst
->
i_stride
[
1
],
pix
[
1
],
stride
[
1
],
h
->
param
.
i_width
,
h
->
param
.
i_height
>>
1
);
h
->
mc
.
plane_copy
(
dst
->
plane
[
1
],
dst
->
i_stride
[
1
],
(
pix
el
*
)
pix
[
1
],
stride
[
1
]
/
sizeof
(
pixel
),
h
->
param
.
i_width
,
h
->
param
.
i_height
>>
1
);
}
else
{
...
...
common/mc.c
View file @
1d22dd50
...
...
@@ -280,7 +280,7 @@ MC_COPY( 8 )
MC_COPY
(
4
)
void
x264_plane_copy_c
(
pixel
*
dst
,
int
i_dst
,
uint8_t
*
src
,
int
i_src
,
int
w
,
int
h
)
pixel
*
src
,
int
i_src
,
int
w
,
int
h
)
{
while
(
h
--
)
{
...
...
common/mc.h
View file @
1d22dd50
...
...
@@ -88,7 +88,7 @@ typedef struct
void
(
*
load_deinterleave_8x8x2_fdec
)(
pixel
*
dst
,
pixel
*
src
,
int
i_src
);
void
(
*
plane_copy
)(
pixel
*
dst
,
int
i_dst
,
uint8_t
*
src
,
int
i_src
,
int
w
,
int
h
);
pixel
*
src
,
int
i_src
,
int
w
,
int
h
);
void
(
*
plane_copy_interleave
)(
pixel
*
dst
,
int
i_dst
,
pixel
*
srcu
,
int
i_srcu
,
pixel
*
srcv
,
int
i_srcv
,
int
w
,
int
h
);
...
...
common/x86/mc-a2.asm
View file @
1d22dd50
...
...
@@ -825,11 +825,13 @@ HPEL ssse3
%endif
; !HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void plane_copy_core(
uint8_t
*dst, int i_dst,
;
uint8_t
*src, int i_src, int w, int h)
; void plane_copy_core(
pixel
*dst, int i_dst,
;
pixel
*src, int i_src, int w, int h)
;-----------------------------------------------------------------------------
; assumes i_dst and w are multiples of 16, and i_dst>w
INIT_MMX
cglobal
plane_copy_core_mmxext
,
6
,
7
FIX_STRIDES
r1d
,
r3d
,
r4d
movsxdifnidn
r1
,
r1d
movsxdifnidn
r3
,
r3d
movsxdifnidn
r4
,
r4d
...
...
@@ -840,22 +842,22 @@ cglobal plane_copy_core_mmxext, 6,7
sub
r6d
,
63
.loopx:
prefetchnta
[
r2
+
256
]
movq
m
m0
,
[
r2
]
movq
m
m1
,
[
r2
+
8
]
movntq
[
r0
],
m
m0
movntq
[
r0
+
8
],
m
m1
movq
m
m2
,
[
r2
+
16
]
movq
m
m3
,
[
r2
+
24
]
movntq
[
r0
+
16
],
m
m2
movntq
[
r0
+
24
],
m
m3
movq
m
m4
,
[
r2
+
32
]
movq
m
m5
,
[
r2
+
40
]
movntq
[
r0
+
32
],
m
m4
movntq
[
r0
+
40
],
m
m5
movq
m
m6
,
[
r2
+
48
]
movq
m
m7
,
[
r2
+
56
]
movntq
[
r0
+
48
],
m
m6
movntq
[
r0
+
56
],
m
m7
movq
m0
,
[
r2
]
movq
m1
,
[
r2
+
8
]
movntq
[
r0
],
m0
movntq
[
r0
+
8
],
m1
movq
m2
,
[
r2
+
16
]
movq
m3
,
[
r2
+
24
]
movntq
[
r0
+
16
],
m2
movntq
[
r0
+
24
],
m3
movq
m4
,
[
r2
+
32
]
movq
m5
,
[
r2
+
40
]
movntq
[
r0
+
32
],
m4
movntq
[
r0
+
40
],
m5
movq
m6
,
[
r2
+
48
]
movq
m7
,
[
r2
+
56
]
movntq
[
r0
+
48
],
m6
movntq
[
r0
+
56
],
m7
add
r2
,
64
add
r0
,
64
sub
r6d
,
64
...
...
@@ -864,10 +866,10 @@ cglobal plane_copy_core_mmxext, 6,7
add
r6d
,
63
jle
.end16
.loop16:
movq
m
m0
,
[
r2
]
movq
m
m1
,
[
r2
+
8
]
movntq
[
r0
],
m
m0
movntq
[
r0
+
8
],
m
m1
movq
m0
,
[
r2
]
movq
m1
,
[
r2
+
8
]
movntq
[
r0
],
m0
movntq
[
r0
+
8
],
m1
add
r2
,
16
add
r0
,
16
sub
r6d
,
16
...
...
common/x86/mc-c.c
View file @
1d22dd50
...
...
@@ -84,8 +84,8 @@ void x264_mc_copy_w16_sse3( uint8_t *, int, uint8_t *, int, int );
void
x264_mc_copy_w16_aligned_sse2
(
pixel
*
,
int
,
pixel
*
,
int
,
int
);
void
x264_prefetch_fenc_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
);
void
x264_prefetch_ref_mmxext
(
uint8_t
*
,
int
,
int
);
void
x264_plane_copy_core_mmxext
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
w
,
int
h
);
void
x264_plane_copy_c
(
uint8_t
*
,
int
,
uint8_t
*
,
int
,
int
w
,
int
h
);
void
x264_plane_copy_core_mmxext
(
pixel
*
,
int
,
pixel
*
,
int
,
int
w
,
int
h
);
void
x264_plane_copy_c
(
pixel
*
,
int
,
pixel
*
,
int
,
int
w
,
int
h
);
void
x264_plane_copy_interleave_core_mmxext
(
pixel
*
dst
,
int
i_dst
,
pixel
*
srcu
,
int
i_srcu
,
pixel
*
srcv
,
int
i_srcv
,
int
w
,
int
h
);
...
...
@@ -426,23 +426,24 @@ HPEL(16, sse2, sse2, sse2, sse2)
HPEL
(
16
,
ssse3
,
ssse3
,
ssse3
,
ssse3
)
#endif
HPEL
(
16
,
sse2_misalign
,
sse2
,
sse2_misalign
,
sse2
)
#endif // HIGH_BIT_DEPTH
static
void
x264_plane_copy_mmxext
(
uint8_t
*
dst
,
int
i_dst
,
uint8_t
*
src
,
int
i_src
,
int
w
,
int
h
)
static
void
x264_plane_copy_mmxext
(
pixel
*
dst
,
int
i_dst
,
pixel
*
src
,
int
i_src
,
int
w
,
int
h
)
{
int
c_w
=
16
/
sizeof
(
pixel
)
-
1
;
if
(
w
<
256
)
{
// tiny resolutions don't want non-temporal hints. dunno the exact threshold.
x264_plane_copy_c
(
dst
,
i_dst
,
src
,
i_src
,
w
,
h
);
}
else
if
(
!
(
w
&
15
)
)
{
}
else
if
(
!
(
w
&
c_w
)
)
{
x264_plane_copy_core_mmxext
(
dst
,
i_dst
,
src
,
i_src
,
w
,
h
);
}
else
if
(
i_src
>
0
)
{
// have to use plain memcpy on the last line (in memory order) to avoid overreading src
x264_plane_copy_core_mmxext
(
dst
,
i_dst
,
src
,
i_src
,
(
w
+
15
)
&~
15
,
h
-
1
);
memcpy
(
dst
+
i_dst
*
(
h
-
1
),
src
+
i_src
*
(
h
-
1
),
w
);
x264_plane_copy_core_mmxext
(
dst
,
i_dst
,
src
,
i_src
,
(
w
+
c_w
)
&~
c_w
,
h
-
1
);
memcpy
(
dst
+
i_dst
*
(
h
-
1
),
src
+
i_src
*
(
h
-
1
),
w
*
sizeof
(
pixel
)
);
}
else
{
memcpy
(
dst
,
src
,
w
);
x264_plane_copy_core_mmxext
(
dst
+
i_dst
,
i_dst
,
src
+
i_src
,
i_src
,
(
w
+
15
)
&~
15
,
h
-
1
);
memcpy
(
dst
,
src
,
w
*
sizeof
(
pixel
)
);
x264_plane_copy_core_mmxext
(
dst
+
i_dst
,
i_dst
,
src
+
i_src
,
i_src
,
(
w
+
c_w
)
&~
c_w
,
h
-
1
);
}
}
#endif // HIGH_BIT_DEPTH
#define PLANE_INTERLEAVE(cpu) \
static void x264_plane_copy_interleave_##cpu( pixel *dst, int i_dst,\
...
...
@@ -483,12 +484,13 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf
->
memzero_aligned
=
x264_memzero_aligned_mmx
;
pf
->
integral_init4v
=
x264_integral_init4v_mmx
;
pf
->
integral_init8v
=
x264_integral_init8v_mmx
;
pf
->
store_interleave_8x8x2
=
x264_store_interleave_8x8x2_mmxext
;
if
(
!
(
cpu
&
X264_CPU_MMXEXT
)
)
return
;
pf
->
plane_copy
=
x264_plane_copy_mmxext
;
pf
->
plane_copy_interleave
=
x264_plane_copy_interleave_mmxext
;
pf
->
store_interleave_8x8x2
=
x264_store_interleave_8x8x2_mmxext
;
pf
->
avg
[
PIXEL_16x16
]
=
x264_pixel_avg_16x16_mmxext
;
pf
->
avg
[
PIXEL_16x8
]
=
x264_pixel_avg_16x8_mmxext
;
...
...
@@ -558,13 +560,6 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
if
(
(
cpu
&
X264_CPU_SHUFFLE_IS_FAST
)
&&
!
(
cpu
&
X264_CPU_SLOW_ATOM
)
)
pf
->
integral_init4v
=
x264_integral_init4v_ssse3
;
#else // !HIGH_BIT_DEPTH
pf
->
store_interleave_8x8x2
=
x264_store_interleave_8x8x2_mmxext
;
pf
->
load_deinterleave_8x8x2_fenc
=
x264_load_deinterleave_8x8x2_fenc_mmx
;
pf
->
load_deinterleave_8x8x2_fdec
=
x264_load_deinterleave_8x8x2_fdec_mmx
;
pf
->
plane_copy
=
x264_plane_copy_mmxext
;
pf
->
plane_copy_deinterleave
=
x264_plane_copy_deinterleave_mmx
;
pf
->
frame_init_lowres_core
=
x264_frame_init_lowres_core_mmxext
;
pf
->
prefetch_fenc
=
x264_prefetch_fenc_mmxext
;
...
...
tools/checkasm.c
View file @
1d22dd50
...
...
@@ -1080,7 +1080,7 @@ static int check_mc( int cpu_ref, int cpu_new )
int
src_stride
=
plane_specs
[
i
].
src_stride
;
int
dst_stride
=
(
w
+
127
)
&
~
63
;
assert
(
dst_stride
*
h
<=
0x1000
);
uint8_t
*
src1
=
buf1
+
X264_MAX
(
0
,
-
src_stride
)
*
(
h
-
1
);
pixel
*
src1
=
p
buf1
+
X264_MAX
(
0
,
-
src_stride
)
*
(
h
-
1
);
memset
(
pbuf3
,
0
,
0x1000
*
sizeof
(
pixel
)
);
memset
(
pbuf4
,
0
,
0x1000
*
sizeof
(
pixel
)
);
call_c
(
mc_c
.
plane_copy
,
pbuf3
,
dst_stride
,
src1
,
src_stride
,
w
,
h
);
...
...
@@ -1106,7 +1106,7 @@ static int check_mc( int cpu_ref, int cpu_new )
int
src_stride
=
(
plane_specs
[
i
].
src_stride
+
1
)
>>
1
;
int
dst_stride
=
(
2
*
w
+
127
)
&
~
63
;
assert
(
dst_stride
*
h
<=
0x1000
);
uint8_t
*
src1
=
buf1
+
X264_MAX
(
0
,
-
src_stride
)
*
(
h
-
1
);
pixel
*
src1
=
p
buf1
+
X264_MAX
(
0
,
-
src_stride
)
*
(
h
-
1
);
memset
(
pbuf3
,
0
,
0x1000
*
sizeof
(
pixel
)
);
memset
(
pbuf4
,
0
,
0x1000
*
sizeof
(
pixel
)
);
call_c
(
mc_c
.
plane_copy_interleave
,
pbuf3
,
dst_stride
,
src1
,
src_stride
,
src1
+
1024
,
src_stride
+
16
,
w
,
h
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment