i422_yuyv.S 2.43 KB
Newer Older
1
 @*****************************************************************************
Pere Orga's avatar
Pere Orga committed
2
 @ i422_yuyv.S : ARM NEONv1 I422 to YUYV chroma conversion
3 4 5 6 7 8 9 10 11 12 13
 @*****************************************************************************
 @ Copyright (C) 2011 Rémi Denis-Courmont
 @
 @ This program is free software; you can redistribute it and/or modify
 @ it under the terms of the GNU Lesser General Public License as published by
 @ the Free Software Foundation; either version 2.1 of the License, or
 @ (at your option) any later version.
 @
 @ This program is distributed in the hope that it will be useful,
 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 @ GNU Lesser General Public License for more details.
15 16 17 18 19 20
 @
 @ You should have received a copy of the GNU Lesser General Public License
 @ along with this program; if not, write to the Free Software Foundation,
 @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
 @****************************************************************************/

21
	.syntax unified
22 23 24 25 26 27 28 29 30 31 32 33 34
	.fpu neon
	.text

#define O	r0
#define OPAD	r1
#define WIDTH	r2
#define HEIGHT	r3
#define Y	r4
#define U	r5
#define V	r6
#define COUNT	ip
#define YPAD	lr

35
	.align 2
36 37 38 39 40 41 42 43 44 45
	.global i422_yuyv_neon
	.type	i422_yuyv_neon, %function
i422_yuyv_neon:
	push		{r4-r6,lr}
	ldmia		r1,	{Y, U, V, YPAD}
	ldmia		r0,	{O, OPAD}
	cmp		HEIGHT,	#0
	sub		OPAD,	OPAD,	WIDTH,	lsl #1
	sub		YPAD,	YPAD,	WIDTH
1:
46
	movsgt		COUNT,	WIDTH
47 48 49 50 51 52 53 54 55 56 57
	pople		{r4-r6,pc}
2:
	pld		[U, #64]
	vld1.u8		{d2},		[U,:64]!
	pld		[V, #64]
	vld1.u8		{d3},		[V,:64]!
	pld		[Y, #64]
	vzip.u8		d2,	d3
	subs		COUNT,	COUNT,	#16
	vld1.u8		{q0},		[Y,:128]!
	vzip.u8		q0,	q1
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
58
	@ TODO: unroll (1 cycle stall)
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
	vst1.u8		{q0-q1},	[O,:128]!
	bgt		2b

	subs		HEIGHT,	#1
	add		U,	U,	YPAD,	lsr #1
	add		V,	V,	YPAD,	lsr #1
	add		Y,	Y,	YPAD
	add		O,	O,	OPAD
	b		1b

	.global i422_uyvy_neon
	.type	i422_uyvy_neon, %function
i422_uyvy_neon:
	push		{r4-r6,lr}
	ldmia		r1,	{Y, U, V, YPAD}
	ldmia		r0,	{O, OPAD}
	cmp		HEIGHT,	#0
	sub		OPAD,	OPAD,	WIDTH,	lsl #1
	sub		YPAD,	YPAD,	WIDTH
1:
79
	movsgt		COUNT,	WIDTH
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
	pople		{r4-r6,pc}
2:
	pld		[U, #64]
	vld1.u8		{d0},		[U,:64]!
	pld		[V, #64]
	vld1.u8		{d1},		[V,:64]!
	pld		[Y, #64]
	vzip.u8		d0,	d1
	subs		COUNT,	COUNT,	#16
	vld1.u8		{q1},		[Y,:128]!
	vzip.u8		q0,	q1
	vst1.u8		{q0-q1},	[O,:128]!
	bgt		2b

	subs		HEIGHT,	#1
	add		U,	U,	YPAD,	lsr #1
	add		V,	V,	YPAD,	lsr #1
	add		Y,	Y,	YPAD
	add		O,	O,	OPAD
	b		1b