Commit ec4d9649 authored by Janne Grunau's avatar Janne Grunau Committed by Martin Storsjö

arm: make the assembler functions compatible with non ELF/gas platforms

Allow assembling arm neon functions for IOS and arm windows.
Signed-off-by: Martin Storsjö's avatarMartin Storsjö <martin@martin.st>
parent c5161337
......@@ -18,18 +18,20 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.arm
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
#define DST r0
#define SRC r1
#define SIZE r2
.align 2
.global amplify_float_arm_neon
.type amplify_float_arm_neon, %function
amplify_float_arm_neon:
function amplify_float_arm_neon
cmp SIZE, #0
bxeq lr
#ifdef __ARM_PCS
......
/*
* Copyright (c) 2018 Janne Grunau <janne-libav@jannau.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef __APPLE__
# define EXTERN_ASM _
#else
# define EXTERN_ASM
#endif
#if defined(__APPLE__) || defined(_WIN32)
# define HAVE_AS_ARCH_DIRECTIVE 0
# define HAVE_AS_FPU_DIRECTIVE 0
#else
# define HAVE_AS_ARCH_DIRECTIVE 1
# define HAVE_AS_FPU_DIRECTIVE 1
#endif
.macro function name
.globl EXTERN_ASM\name
#ifdef __ELF__
.type EXTERN_ASM\name, %function
#endif
EXTERN_ASM\name:
.endm
......@@ -19,8 +19,12 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.fpu neon
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
#define UV r0
......@@ -35,9 +39,7 @@
#define OPAD lr
.align 2
.global deinterleave_chroma_neon
.type deinterleave_chroma_neon, %function
deinterleave_chroma_neon:
function deinterleave_chroma_neon
push {r4-r6,lr}
ldmia r0, {U, V, OPITCH}
ldmia r1, {UV, IPITCH}
......
......@@ -19,8 +19,12 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.fpu neon
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
/* ARM */
......@@ -80,9 +84,7 @@ coefficients:
.short -18432
.align 2
.global i420_rgb_neon
.type i420_rgb_neon, %function
i420_rgb_neon:
function i420_rgb_neon
push {r4-r8,r10-r11,lr}
vpush {q4-q7}
......
......@@ -19,8 +19,12 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.fpu neon
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
/* ARM */
......@@ -83,9 +87,7 @@ coefficients:
.short -18432
.align 2
.global i420_rv16_neon
.type i420_rv16_neon, %function
i420_rv16_neon:
function i420_rv16_neon
push {r4-r8,r10-r11,lr}
vpush {q4-q7}
......
......@@ -18,8 +18,12 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.fpu neon
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
#define O1 r0
......@@ -37,9 +41,7 @@
#define OPITCH lr
.align 2
.global i420_yuyv_neon
.type i420_yuyv_neon, %function
i420_yuyv_neon:
function i420_yuyv_neon
push {r4-r8,r10-r11,lr}
ldmia r0, {O1, OPITCH}
ldmia r1, {Y1, U, V, YPITCH}
......@@ -76,9 +78,7 @@ i420_yuyv_neon:
add V, V, YPAD, lsr #1
b 1b
.global i420_uyvy_neon
.type i420_uyvy_neon, %function
i420_uyvy_neon:
function i420_uyvy_neon
push {r4-r8,r10-r11,lr}
ldmia r0, {O1, OPITCH}
ldmia r1, {Y1, U, V, YPITCH}
......
......@@ -18,8 +18,12 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.fpu neon
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
#define O r0
......@@ -33,9 +37,7 @@
#define YPAD lr
.align 2
.global i422_yuyv_neon
.type i422_yuyv_neon, %function
i422_yuyv_neon:
function i422_yuyv_neon
push {r4-r6,lr}
ldmia r1, {Y, U, V, YPAD}
ldmia r0, {O, OPAD}
......@@ -66,9 +68,7 @@ i422_yuyv_neon:
add O, O, OPAD
b 1b
.global i422_uyvy_neon
.type i422_uyvy_neon, %function
i422_uyvy_neon:
function i422_uyvy_neon
push {r4-r6,lr}
ldmia r1, {Y, U, V, YPAD}
ldmia r0, {O, OPAD}
......
......@@ -19,8 +19,12 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.fpu neon
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
/* ARM */
......@@ -76,9 +80,7 @@ coefficients:
.short -18432
.align 2
.global nv12_rgb_neon
.type nv12_rgb_neon, %function
nv12_rgb_neon:
function nv12_rgb_neon
push {r4-r8,r10-r11,lr}
vpush {q4-q7}
......
......@@ -19,8 +19,12 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.fpu neon
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
/* ARM */
......@@ -76,9 +80,7 @@ coefficients:
.short -18432
.align 2
.global nv21_rgb_neon
.type nv21_rgb_neon, %function
nv21_rgb_neon:
function nv21_rgb_neon
push {r4-r8,r10-r11,lr}
vpush {q4-q7}
......
......@@ -19,7 +19,11 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
.fpu neon
#include "asm.S"
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
.align 2
......@@ -34,9 +38,7 @@ coeff_7to2:
.float 0.5
.float 0.25
.float 0.25
.global convert_7_x_to_2_0_neon_asm
.type convert_7_x_to_2_0_neon_asm, %function
convert_7_x_to_2_0_neon_asm:
function convert_7_x_to_2_0_neon_asm
push {r4,lr}
adr COEFF, coeff_7to2
......@@ -70,9 +72,7 @@ coeff_5to2:
.float 0.5
.float 0.33
.float 0.33
.global convert_5_x_to_2_0_neon_asm
.type convert_5_x_to_2_0_neon_asm, %function
convert_5_x_to_2_0_neon_asm:
function convert_5_x_to_2_0_neon_asm
push {r4,lr}
adr COEFF, coeff_5to2
......@@ -100,9 +100,7 @@ convert_5_x_to_2_0_neon_asm:
coeff_4to2:
.float 0.5
.float 0.5
.global convert_4_0_to_2_0_neon_asm
.type convert_4_0_to_2_0_neon_asm, %function
convert_4_0_to_2_0_neon_asm:
function convert_4_0_to_2_0_neon_asm
push {r4,lr}
adr COEFF, coeff_4to2
......@@ -124,9 +122,7 @@ convert_4_0_to_2_0_neon_asm:
coeff_3to2:
.float 0.5
.float 0.5
.global convert_3_x_to_2_0_neon_asm
.type convert_3_x_to_2_0_neon_asm, %function
convert_3_x_to_2_0_neon_asm:
function convert_3_x_to_2_0_neon_asm
push {r4,lr}
adr COEFF, coeff_3to2
......@@ -154,9 +150,7 @@ coeff_7to1:
.float 0.25
.float 0.125
.float 0.125
.global convert_7_x_to_1_0_neon_asm
.type convert_7_x_to_1_0_neon_asm, %function
convert_7_x_to_1_0_neon_asm:
function convert_7_x_to_1_0_neon_asm
push {r4,lr}
adr COEFF, coeff_7to1
......@@ -188,9 +182,7 @@ coeff_5to1:
.float 0.25
.float 0.16666667
.float 0.16666667
.global convert_5_x_to_1_0_neon_asm
.type convert_5_x_to_1_0_neon_asm, %function
convert_5_x_to_1_0_neon_asm:
function convert_5_x_to_1_0_neon_asm
push {r4,lr}
adr COEFF, coeff_5to1
......@@ -219,9 +211,7 @@ coeff_7to4:
.float 0.5
.float 0.16666667
.float 0.16666667
.global convert_7_x_to_4_0_neon_asm
.type convert_7_x_to_4_0_neon_asm, %function
convert_7_x_to_4_0_neon_asm:
function convert_7_x_to_4_0_neon_asm
push {r4,lr}
adr COEFF, coeff_7to4
......@@ -252,9 +242,7 @@ convert_7_x_to_4_0_neon_asm:
coeff_5to4:
.float 0.5
.float 0.5
.global convert_5_x_to_4_0_neon_asm
.type convert_5_x_to_4_0_neon_asm, %function
convert_5_x_to_4_0_neon_asm:
function convert_5_x_to_4_0_neon_asm
push {r4,lr}
adr COEFF, coeff_5to4
......
......@@ -18,8 +18,12 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "asm.S"
.syntax unified
.fpu neon
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
#define I r0
......@@ -33,9 +37,7 @@
#define YPAD lr
.align 2
.global yuyv_i422_neon
.type yuyv_i422_neon, %function
yuyv_i422_neon:
function yuyv_i422_neon
push {r4-r6,lr}
ldmia r0, {Y, U, V, YPAD}
ldmia r1, {I, IPAD}
......@@ -64,9 +66,7 @@ yuyv_i422_neon:
add V, V, YPAD, lsr #1
b 1b
.global uyvy_i422_neon
.type uyvy_i422_neon, %function
uyvy_i422_neon:
function uyvy_i422_neon
push {r4-r6,lr}
ldmia r0, {Y, U, V, YPAD}
ldmia r1, {I, IPAD}
......
......@@ -18,10 +18,16 @@
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
#include "../arm_neon/asm.S"
.syntax unified
.arm
#if HAVE_AS_ARCH_DIRECTIVE
.arch armv6
#endif
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
.text
#define DEST r0
......@@ -30,10 +36,8 @@
#define SIZE r3
.align 2
.global merge8_arm_neon
.type merge8_arm_neon, %function
@ NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
merge8_arm_neon:
function merge8_arm_neon
cmp SIZE, #64
blo 2f
1:
......@@ -72,9 +76,7 @@ merge8_arm_neon:
bx lr
.align 2
.global merge16_arm_neon
.type merge16_arm_neon, %function
merge16_arm_neon:
function merge16_arm_neon
cmp SIZE, #64
blo 2f
1:
......@@ -113,9 +115,7 @@ merge16_arm_neon:
bx lr
.align 2
.global merge8_armv6
.type merge8_armv6, %function
merge8_armv6:
function merge8_armv6
push {r4-r9,lr}
1:
pld [SRC1, #64]
......@@ -135,9 +135,7 @@ merge8_armv6:
b 1b
.align 2
.global merge16_armv6
.type merge16_armv6, %function
merge16_armv6:
function merge16_armv6
push {r4-r9,lr}
1:
pld [SRC1, #64]
......
......@@ -19,6 +19,8 @@
// Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
//****************************************************************************/
#include "../../arm_neon/asm.S"
.text
#define DEST x0
......@@ -27,10 +29,8 @@
#define SIZE x3
.align 2
.global merge8_arm64_neon
.type merge8_arm64_neon, %function
// NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
merge8_arm64_neon:
function merge8_arm64_neon
ands x5, SIZE, #~63
b.eq 2f
mov x10, #64
......@@ -66,9 +66,7 @@ merge8_arm64_neon:
ret
.align 2
.global merge16_arm64_neon
.type merge16_arm64_neon, %function
merge16_arm64_neon:
function merge16_arm64_neon
ands x5, SIZE, #~63
b.eq 2f
1:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment