i420_yuyv.S 2.71 KB
Newer Older
1
 @*****************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
 @ i420_yuyv_neon.S : ARM NEONv1 I420 to YUYV chroma conversion
 @*****************************************************************************
 @ Copyright (C) 2009 Rémi Denis-Courmont
 @
 @ This program is free software; you can redistribute it and/or modify
 @ it under the terms of the GNU General Public License as published by
 @ the Free Software Foundation; either version 2 of the License, or
 @ (at your option) any later version.
 @
 @ This program is distributed in the hope that it will be useful,
 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 @ GNU General Public License for more details.
 @
 @ You should have received a copy of the GNU General Public License
17
18
 @ along with this program; if not, write to the Free Software Foundation,
 @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19
20
21
22
23
24
 @****************************************************************************/

	.fpu neon
	.text

#define O1	r0
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
25
26
27
28
29
30
31
32
33
#define O2	r1
#define PITCH	r2
#define S_OFF	r3
#define Y1	r4
#define Y2	r5
#define U	r6
#define V	r7
#define HEIGHT	r8
#define END_O1	r12
34
35
36
37
38

	.align
	.global i420_yuyv_neon
	.type	i420_yuyv_neon, %function
i420_yuyv_neon:
39
40
	push		{r4-r8, lr}
	ldr		HEIGHT, [sp, #(4*6)]
41
	ldmia		r1,	{Y1, U, V}
42
43
	add		O2,	O1,	PITCH, lsl #1
	add		Y2,	Y1,	PITCH
44
	add		Y2,	S_OFF
45
46
1:
	mov		END_O1,	O2
47
	pld		[Y2]
48
2:
49
	pld		[U, #64]
50
	vld1.u8		{d2},		[U,:64]!
51
	pld		[V, #64]
52
	vld1.u8		{d3},		[V,:64]!
53
	pld		[Y1, #64]
54
55
	vzip.u8		d2,	d3
	vld1.u8		{q0},		[Y1,:128]!
56
	pld		[Y2, #64]
57
58
59
60
61
62
	vmov		q3,	q1
	vzip.u8		q0,	q1
	vld1.u8		{q2},		[Y2,:128]!
	vzip.u8		q2,	q3
	vst1.u8		{q0-q1},	[O1,:128]!
	vst1.u8		{q2-q3},	[O2,:128]!
63
64
65
66
67
68
69

	cmp		O1,	END_O1
	bne		2b

	sub		HEIGHT,	#2
	mov		O1,	O2
	add		O2,	PITCH,	lsl #1
70
	add		Y2,	S_OFF
71
72
	mov		Y1,	Y2
	add		Y2,	PITCH
73
74
75
	add		Y2,	S_OFF
	add		U,	S_OFF,	lsr #1
	add		V,	S_OFF,	lsr #1
76
77
78
79

	cmp		HEIGHT,	#0
	bne		1b

80
	pop		{r4-r8, pc}
81

82
83
84
	.global i420_uyvy_neon
	.type	i420_uyvy_neon, %function
i420_uyvy_neon:
85
86
	push		{r4-r8, lr}
	ldr		HEIGHT, [sp, #(4*6)]
87
88
89
	ldmia		r1,	{Y1, U, V}
	add		O2,	O1,	PITCH, lsl #1
	add		Y2,	Y1,	PITCH
90
	add		Y2,	S_OFF
91
92
93
1:
	mov		END_O1,	O2
2:
94
	pld		[U, #64]
95
	vld1.u8		{d0},		[U,:64]!
96
	pld		[V, #64]
97
	vld1.u8		{d1},		[V,:64]!
98
	pld		[Y1, #64]
99
100
	vzip.u8		d0,	d1
	vld1.u8		{q1},		[Y1,:128]!
101
	pld		[Y2, #64]
102
103
104
105
106
107
108
109
110
111
112
113
114
	vmov		q2,	q0
	vzip.u8		q0,	q1
	vld1.u8		{q3},		[Y2,:128]!
	vzip.u8		q2,	q3
	vst1.u8		{q0-q1},	[O1,:128]!
	vst1.u8		{q2-q3},	[O2,:128]!

	cmp		O1,	END_O1
	bne		2b

	sub		HEIGHT,	#2
	mov		O1,	O2
	add		O2,	PITCH,	lsl #1
115
	add		Y2,	S_OFF
116
117
	mov		Y1,	Y2
	add		Y2,	PITCH
118
119
120
	add		Y2,	S_OFF
	add		U,	S_OFF,	lsr #1
	add		V,	S_OFF,	lsr #1
121
122
123
124

	cmp		HEIGHT,	#0
	bne		1b

125
	pop		{r4-r8, pc}