Commit 4a9b6e51 authored by Martin Storsjö's avatar Martin Storsjö

arm: Allow building assembly in thumb mode

Windows on arm is thumb2 only.

Add the necessary "it" instructions before conditionally executed
instructions (which doesn't emit any extra instructions when not
building in thumb mode). The number of "it" instructions could
be reduced in some places by reordering the instructions, but keeping
them as they were originally to avoid any impact on existing
targets.

Remove redundant .arm directives; the assembler normally starts out
in that mode anyway, and for windows we shouldn't override the
default mode.
parent 4214cf05
......@@ -21,7 +21,6 @@
#include "asm.S"
.syntax unified
.arm
#if HAVE_AS_FPU_DIRECTIVE
.fpu neon
#endif
......@@ -33,6 +32,7 @@
.align 2
function amplify_float_arm_neon
cmp SIZE, #0
it eq
bxeq lr
#ifdef __ARM_PCS
vmov s0, r3 @ softfp
......
......@@ -52,6 +52,7 @@ function deinterleave_chroma_neon
sub IPAD, IPITCH, WIDTH, lsl #1
sub OPAD, OPITCH, WIDTH
1:
ite gt
movsgt COUNT, WIDTH
pople {r4-r6,pc}
2:
......
......@@ -95,6 +95,7 @@ function i420_rgb_neon
/* round the width to be a multiple of 16 */
ands OPAD, WIDTH, #15
sub WIDTH, WIDTH, OPAD
it ne
addne WIDTH, WIDTH, #16
/* init constants (scale value by 64) */
......@@ -115,10 +116,12 @@ function i420_rgb_neon
sub YPAD, YPITCH, WIDTH
loop_row:
it gt
movsgt COUNT, WIDTH
add O2, O1, OPITCH
add Y2, Y1, YPITCH
/* exit if all rows have been processed */
itt le
vpople {q4-q7}
pople {r4-r8,r10-r11,pc}
......
......@@ -98,6 +98,7 @@ function i420_rv16_neon
/* round the width to be a multiple of 16 */
ands OPAD, WIDTH, #15
sub WIDTH, WIDTH, OPAD
it ne
addne WIDTH, WIDTH, #16
/* init constants (scale value by 64) */
......@@ -117,10 +118,12 @@ function i420_rv16_neon
sub YPAD, YPITCH, WIDTH
loop_row:
it gt
movsgt COUNT, WIDTH
add O2, O1, OPITCH
add Y2, Y1, YPITCH
/* exit if all rows have been processed */
itt le
vpople {q4-q7}
pople {r4-r8,r10-r11,pc}
......
......@@ -49,9 +49,11 @@ function i420_yuyv_neon
sub OPAD, OPITCH, WIDTH, lsl #1
sub YPAD, YPITCH, WIDTH
1:
it gt
movsgt COUNT, WIDTH
add O2, O1, OPITCH
add Y2, Y1, YPITCH
it le
pople {r4-r8,r10-r11,pc}
2:
pld [U, #64]
......@@ -86,9 +88,11 @@ function i420_uyvy_neon
sub OPAD, OPITCH, WIDTH, lsl #1
sub YPAD, YPITCH, WIDTH
1:
it gt
movsgt COUNT, WIDTH
add O2, O1, OPITCH
add Y2, Y1, YPITCH
it le
pople {r4-r8,r10-r11,pc}
2:
pld [U, #64]
......
......@@ -45,6 +45,7 @@ function i422_yuyv_neon
sub OPAD, OPAD, WIDTH, lsl #1
sub YPAD, YPAD, WIDTH
1:
ite gt
movsgt COUNT, WIDTH
pople {r4-r6,pc}
2:
......@@ -76,6 +77,7 @@ function i422_uyvy_neon
sub OPAD, OPAD, WIDTH, lsl #1
sub YPAD, YPAD, WIDTH
1:
ite gt
movsgt COUNT, WIDTH
pople {r4-r6,pc}
2:
......
......@@ -91,6 +91,7 @@ function nv12_rgb_neon
/* round the width to be a multiple of 16 */
ands OPAD, WIDTH, #15
sub WIDTH, WIDTH, OPAD
it ne
addne WIDTH, WIDTH, #16
/* init constants (scale value by 64) */
......@@ -111,10 +112,12 @@ function nv12_rgb_neon
sub YPAD, YPITCH, WIDTH
loop_row:
it gt
movsgt COUNT, WIDTH
add O2, O1, OPITCH
add Y2, Y1, YPITCH
/* exit if all rows have been processed */
itt le
vpople {q4-q7}
pople {r4-r8,r10-r11,pc}
......
......@@ -91,6 +91,7 @@ function nv21_rgb_neon
/* round the width to be a multiple of 16 */
ands OPAD, WIDTH, #15
sub WIDTH, WIDTH, OPAD
it ne
addne WIDTH, WIDTH, #16
/* init constants (scale value by 64) */
......@@ -111,10 +112,12 @@ function nv21_rgb_neon
sub YPAD, YPITCH, WIDTH
loop_row:
it gt
movsgt COUNT, WIDTH
add O2, O1, OPITCH
add Y2, Y1, YPITCH
/* exit if all rows have been processed */
itt le
vpople {q4-q7}
pople {r4-r8,r10-r11,pc}
......
......@@ -45,6 +45,7 @@ function yuyv_i422_neon
sub YPAD, YPAD, WIDTH
sub IPAD, IPAD, WIDTH, lsl #1
1:
ite gt
movsgt COUNT, WIDTH
pople {r4-r6,pc}
2:
......@@ -74,6 +75,7 @@ function uyvy_i422_neon
sub YPAD, YPAD, WIDTH
sub IPAD, IPAD, WIDTH, lsl #1
1:
ite gt
movsgt COUNT, WIDTH
pople {r4-r6,pc}
2:
......
......@@ -21,7 +21,6 @@
#include "../arm_neon/asm.S"
.syntax unified
.arm
#if HAVE_AS_ARCH_DIRECTIVE
.arch armv6
#endif
......@@ -67,6 +66,7 @@ function merge8_arm_neon
vst1.u8 {q0-q1}, [DEST,:128]!
3:
cmp SIZE, #16
it lo
bxlo lr
vld1.u8 {q0}, [SRC1,:128]!
sub SIZE, SIZE, #16
......@@ -106,6 +106,7 @@ function merge16_arm_neon
vst1.u16 {q0-q1}, [DEST,:128]!
3:
cmp SIZE, #16
it lo
bxlo lr
vld1.u16 {q0}, [SRC1,:128]!
sub SIZE, SIZE, #16
......@@ -131,6 +132,7 @@ function merge8_armv6
stm DEST!, {r4-r5}
uhadd8 r7, r7, lr
stm DEST!, {r6-r7}
it eq
popeq {r4-r9,pc}
b 1b
......@@ -151,5 +153,6 @@ function merge16_armv6
stm DEST!, {r4-r5}
uhadd16 r7, r7, lr
stm DEST!, {r6-r7}
it eq
popeq {r4-r9,pc}
b 1b
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment