Commit b461e015 authored by Martin Storsjö's avatar Martin Storsjö Committed by Anton Mitrofanov
Browse files

Update to the latest upstream version of gas-preprocessor

This version supports converting aarch64 assembly for MS armasm64.exe.
parent 61e8b5cc
......@@ -97,8 +97,12 @@ if (grep /\.c$/, @gcc_cmd) {
if ($as_type eq "armasm") {
$preprocess_c_cmd[0] = "cpp";
push(@preprocess_c_cmd, "-U__ELF__");
push(@preprocess_c_cmd, "-U__MACH__");
push(@preprocess_c_cmd, "-undef");
# Normally a preprocessor for windows would predefine _WIN32,
# but we're using any generic system-agnostic preprocessor "cpp"
# with -undef (to avoid getting predefined variables from the host
# system in cross compilation cases), so manually define it here.
push(@preprocess_c_cmd, "-D_WIN32");
@preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
# Remove -ignore XX parameter pairs from preprocess_c_cmd
......@@ -245,7 +249,7 @@ my $thumb = 0;
my %thumb_labels;
my %call_targets;
my %mov32_targets;
my %import_symbols;
my %neon_alias_reg;
my %neon_alias_type;
......@@ -270,7 +274,7 @@ while (<INPUT>) {
# the line indicates a comment for all supported archs (aarch64, arm, ppc
# and x86). Also strips line number comments but since they are off anyway
# it is no loss.
s/^#.*$//;
s/^\s*#.*$//;
# remove all comments (to avoid interfering with evaluating directives)
s/(?<!\\)$inputcomm.*//x;
# Strip out windows linefeeds
......@@ -626,18 +630,26 @@ sub is_arm_register {
return 0;
}
sub is_aarch64_register {
my $name = $_[0];
if ($name =~ /^[xw]\d+$/) {
return 1;
}
return 0;
}
sub handle_local_label {
my $line = $_[0];
my $num = $_[1];
my $dir = $_[2];
my $target = "$num$dir";
if ($dir eq "b") {
$line =~ s/$target/$last_temp_labels{$num}/g;
$line =~ s/\b$target\b/$last_temp_labels{$num}/g;
} else {
my $name = "temp_label_$temp_label_next";
$temp_label_next++;
push(@{$next_temp_labels{$num}}, $name);
$line =~ s/$target/$name/g;
$line =~ s/\b$target\b/$name/g;
}
return $line;
}
......@@ -677,9 +689,9 @@ sub handle_serialized_line {
}
# handle GNU as pc-relative relocations for adrp/add
if ($line =~ /(.*)\s*adrp([\w\s\d]+)\s*,\s*#?:pg_hi21:([^\s]+)/) {
if ($line =~ /(.*)\s*adrp([\w\s\d]+)\s*,\s*#?:pg_hi21:([^\s]+)/ and $as_type =~ /^apple-/) {
$line = "$1 adrp$2, ${3}\@PAGE\n";
} elsif ($line =~ /(.*)\s*add([\w\s\d]+)\s*,([\w\s\d]+)\s*,\s*#?:lo12:([^\s]+)/) {
} elsif ($line =~ /(.*)\s*add([\w\s\d]+)\s*,([\w\s\d]+)\s*,\s*#?:lo12:([^\s]+)/ and $as_type =~ /^apple-/) {
$line = "$1 add$2, $3, ${4}\@PAGEOFF\n";
}
......@@ -705,7 +717,7 @@ sub handle_serialized_line {
my $cond = $3;
my $label = $4;
# Don't interpret e.g. bic as b<cc> with ic as conditional code
if ($cond =~ /|$arm_cond_codes/) {
if ($cond =~ /^(|$arm_cond_codes)$/) {
if (exists $thumb_labels{$label}) {
print ASMFILE ".thumb_func $label\n";
} else {
......@@ -785,24 +797,25 @@ sub handle_serialized_line {
if ($arch eq "aarch64") {
# fix missing aarch64 instructions in Xcode 5.1 (beta3)
# mov with vector arguments is not supported, use alias orr instead
if ($line =~ /^\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
$line = " orr $1, $2, $2\n";
if ($line =~ /^(\d+:)?\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
$line = "$1 orr $2, $3, $3\n";
}
# movi 16, 32 bit shifted variant, shift is optional
if ($line =~ /^\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) {
$line = " movi $1, $2, lsl #0\n";
if ($line =~ /^(\d+:)?\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) {
$line = "$1 movi $2, $3, lsl #0\n";
}
# Xcode 5 misses the alias uxtl. Replace it with the more general ushll.
# Clang 3.4 misses the alias sxtl too. Replace it with the more general sshll.
if ($line =~ /^\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) {
$line = " $1shll$2 $3, $4, #0\n";
# armasm64 also misses these instructions.
if ($line =~ /^(\d+:)?\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) {
$line = "$1 $2shll$3 $4, $5, #0\n";
}
# clang 3.4 does not automatically use shifted immediates in add/sub
if ($as_type eq "clang" and
$line =~ /^(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) {
my $imm = eval $3;
# clang 3.4 and armasm64 do not automatically use shifted immediates in add/sub
if (($as_type eq "clang" or $as_type eq "armasm") and
$line =~ /^(\d+:)?(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) {
my $imm = eval $4;
if ($imm > 4095 and not ($imm & 4095)) {
$line = "$1 $2#" . ($imm >> 12) . ", lsl #12\n";
$line = "$1 $2 $3#" . ($imm >> 12) . ", lsl #12\n";
}
}
if ($ENV{GASPP_FIX_XCODE5}) {
......@@ -853,7 +866,7 @@ sub handle_serialized_line {
$last_temp_labels{$num} = $name;
}
if ($line =~ s/^(\w+):/$1/) {
if ($line =~ s/^\s*(\w+):/$1/) {
# Skip labels that have already been declared with a PROC,
# labels must not be declared multiple times.
return if (defined $labels_seen{$1});
......@@ -866,21 +879,40 @@ sub handle_serialized_line {
# Check branch instructions
if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?(..)?(\.w)?)\s+(\w+)/) {
if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?\.?(..)?(\.w)?)\s+(\w+)/) {
my $instr = $2;
my $cond = $3;
my $width = $4;
my $target = $5;
# Don't interpret e.g. bic as b<cc> with ic as conditional code
if ($cond !~ /|$arm_cond_codes/) {
if ($cond !~ /^(|$arm_cond_codes)$/) {
# Not actually a branch
} elsif ($target =~ /^(\d+)([bf])$/) {
# The target is a local label
$line = handle_local_label($line, $1, $2);
$line =~ s/\b$instr\b/$&.w/ if $width eq "";
} elsif (!is_arm_register($target)) {
$line =~ s/\b$instr\b/$&.w/ if $width eq "" and $arch eq "arm";
} elsif (($arch eq "arm" and !is_arm_register($target)) or
($arch eq "aarch64" and !is_aarch64_register($target))) {
$call_targets{$target}++;
}
} elsif ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(cbn?z|adr|tbz)\s+(\w+)\s*,(\s*#\d+\s*,)?\s*(\w+)/) {
my $instr = $2;
my $reg = $3;
my $bit = $4;
my $target = $5;
if ($target =~ /^(\d+)([bf])$/) {
# The target is a local label
$line = handle_local_label($line, $1, $2);
} else {
$call_targets{$target}++;
}
# Convert tbz with a wX register into an xX register,
# due to armasm64 bugs/limitations.
if ($instr eq "tbz" and $reg =~ /w\d+/) {
my $xreg = $reg;
$xreg =~ s/w/x/;
$line =~ s/\b$reg\b/$xreg/;
}
} elsif ($line =~ /^\s*.h?word.*\b\d+[bf]\b/) {
while ($line =~ /\b(\d+)([bf])\b/g) {
$line = handle_local_label($line, $1, $2);
......@@ -918,19 +950,106 @@ sub handle_serialized_line {
$line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/;
}
# Change a movw... #:lower16: into a mov32 pseudoinstruction
$line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
# and remove the following, matching movt completely
$line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
if ($arch eq "arm") {
# Change a movw... #:lower16: into a mov32 pseudoinstruction
$line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
# and remove the following, matching movt completely
$line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
$mov32_targets{$1}++;
}
if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
$import_symbols{$1}++;
}
# Misc bugs/deficiencies:
# armasm seems unable to parse e.g. "vmov s0, s1" without a type
# qualifier, thus add .f32.
$line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/;
# Misc bugs/deficiencies:
# armasm seems unable to parse e.g. "vmov s0, s1" without a type
# qualifier, thus add .f32.
$line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/;
} elsif ($arch eq "aarch64") {
# Convert ext into ext8; armasm64 seems to require it named as ext8.
$line =~ s/^(\s+)ext(\s+)/$1ext8$2/;
# Pick up targets from ldr x0, =sym+offset
if ($line =~ /^\s*ldr\s+(\w+)\s*,\s*=([a-zA-Z]\w*)(.*)$/) {
my $reg = $1;
my $sym = $2;
my $offset = eval_expr($3);
if ($offset < 0) {
# armasm64 is buggy with ldr x0, =sym+offset where the
# offset is a negative value; it does write a negative
# offset into the literal pool as it should, but the
# negative offset only covers the lower 32 bit of the 64
# bit literal/relocation.
# Thus remove the offset and apply it manually with a sub
# afterwards.
$offset = -$offset;
$line = "\tldr $reg, =$sym\n\tsub $reg, $reg, #$offset\n";
}
$import_symbols{$sym}++;
}
# armasm64 (currently) doesn't support offsets on adrp targets,
# even though the COFF format relocations (and the linker)
# supports it. Therefore strip out the offsets from adrp and
# add :lo12: (in case future armasm64 would start handling it)
# and add an extra explicit add instruction for the offset.
if ($line =~ s/(adrp\s+\w+\s*,\s*(\w+))([\d\+\-\*\/\(\) <>]+)?/\1/) {
$import_symbols{$2}++;
}
if ($line =~ s/(add\s+(\w+)\s*,\s*\w+\s*,\s*):lo12:(\w+)([\d\+\-\*\/\(\) <>]+)?/\1\3/) {
my $reg = $2;
my $sym = $3;
my $offset = eval_expr($4);
$line .= "\tadd $reg, $reg, #$offset\n" if $offset > 0;
$import_symbols{$sym}++;
}
# Convert e.g. "add x0, x0, w0, uxtw" into "add x0, x0, w0, uxtw #0",
# or "ldr x0, [x0, w0, uxtw]" into "ldr x0, [x0, w0, uxtw #0]".
$line =~ s/(uxtw|sxtw)(\s*\]?\s*)$/\1 #0\2/i;
# Convert "mov x0, v0.d[0]" into "umov x0, v0.d[0]"
$line =~ s/\bmov\s+[xw]\d+\s*,\s*v\d+\.[ds]/u$&/i;
# Convert "ccmp w0, #0, #0, ne" into "ccmpne w0, #0, #0",
# and "csel w0, w0, w0, ne" into "cselne w0, w0, w0".
$line =~ s/(ccmp|csel)\s+([xw]\w+)\s*,\s*([xw#]\w+)\s*,\s*([xw#]\w+)\s*,\s*($arm_cond_codes)/\1\5 \2, \3, \4/;
# Convert "cinc w0, w0, ne" into "cincne w0, w0".
$line =~ s/(cinc)\s+([xw]\w+)\s*,\s*([xw]\w+)\s*,\s*($arm_cond_codes)/\1\4 \2, \3/;
# Convert "cset w0, lo" into "csetlo w0"
$line =~ s/(cset)\s+([xw]\w+)\s*,\s*($arm_cond_codes)/\1\3 \2/;
# Strip out prfum; armasm64 fails to assemble any
# variant/combination of prfum tested so far, but it can be
# left out without any
$line =~ s/prfum.*\]//;
# Convert "ldrb w0, [x0, #-1]" into "ldurb w0, [x0, #-1]".
# Don't do this for forms with writeback though.
if ($line =~ /(ld|st)(r[bh]?)\s+(\w+)\s*,\s*\[\s*(\w+)\s*,\s*#([^\]]+)\s*\][^!]/) {
my $instr = $1;
my $suffix = $2;
my $target = $3;
my $base = $4;
my $offset = eval_expr($5);
if ($offset < 0) {
$line =~ s/$instr$suffix/${instr}u$suffix/;
}
}
if ($ENV{GASPP_ARMASM64_INVERT_SCALE}) {
# Instructions like fcvtzs and scvtf store the scale value
# inverted in the opcode (stored as 64 - scale), but armasm64
# in early versions stores it as-is. Thus convert from
# "fcvtzs w0, s0, #8" into "fcvtzs w0, s0, #56".
if ($line =~ /(?:fcvtzs|scvtf)\s+(\w+)\s*,\s*(\w+)\s*,\s*#(\d+)/) {
my $scale = $3;
my $inverted_scale = 64 - $3;
$line =~ s/#$scale/#$inverted_scale/;
}
}
}
# armasm is unable to parse &0x - add spacing
$line =~ s/&0x/& 0x/g;
}
......@@ -944,7 +1063,7 @@ sub handle_serialized_line {
# Convert "mov pc, lr" into "bx lr", since the former only works
# for switching from arm to thumb (and only in armv7), but not
# from thumb to arm.
s/mov\s*pc\s*,\s*lr/bx lr/g;
$line =~ s/mov\s*pc\s*,\s*lr/bx lr/g;
# Convert stmdb/ldmia/stmfd/ldmfd/ldm with only one register into a plain str/ldr with post-increment/decrement.
# Wide thumb2 encoding requires at least two registers in register list while all other encodings support one register too.
......@@ -1013,11 +1132,16 @@ sub handle_serialized_line {
$line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=4, CODEALIGN/;
$line =~ s/(\s*)(.*)\.rodata/$1AREA |.rodata|, DATA, READONLY, ALIGN=5/;
$line =~ s/\.data/AREA |.data|, DATA, ALIGN=5/;
}
if ($as_type eq "armasm" and $arch eq "arm") {
$line =~ s/fmxr/vmsr/;
$line =~ s/fmrx/vmrs/;
$line =~ s/fadds/vadd.f32/;
}
if ($as_type eq "armasm" and $arch eq "aarch64") {
# Convert "b.eq" into "beq"
$line =~ s/\bb\.($arm_cond_codes)\b/b\1/;
}
# catch unknown section names that aren't mach-o style (with a comma)
if ($as_type =~ /apple-/ and $line =~ /.section ([^,]*)$/) {
......@@ -1038,7 +1162,7 @@ if ($as_type ne "armasm") {
grep exists $thumb_labels{$_}, keys %call_targets;
} else {
map print(ASMFILE "\tIMPORT $_\n"),
grep ! exists $labels_seen{$_}, (keys %call_targets, keys %mov32_targets);
grep ! exists $labels_seen{$_}, (keys %call_targets, keys %import_symbols);
print ASMFILE "\tEND\n";
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment