Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52867 views
1
#!/usr/bin/env perl
2
# by David Conrad
3
# This code is licensed under GPLv2 or later; go to gnu.org to read it
4
# (not that it much matters for an asm preprocessor)
5
# usage: set your assembler to be something like "perl gas-preprocessor.pl gcc"
6
use strict;
7
8
# Apple's gas is ancient and doesn't support modern preprocessing features like
9
# .rept and has ugly macro syntax, among other things. Thus, this script
10
# implements the subset of the gas preprocessor used by x264 and ffmpeg
11
# that isn't supported by Apple's gas.
12
13
my %canonical_arch = ("aarch64" => "aarch64", "arm64" => "aarch64",
14
"arm" => "arm",
15
"powerpc" => "powerpc", "ppc" => "powerpc");
16
17
my %comments = ("aarch64" => '//',
18
"arm" => '@',
19
"powerpc" => '#');
20
21
my @gcc_cmd;
22
my @preprocess_c_cmd;
23
24
my $comm;
25
my $arch;
26
my $as_type = "apple-gas";
27
28
my $fix_unreq = $^O eq "darwin";
29
my $force_thumb = 0;
30
31
my $arm_cond_codes = "eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo";
32
33
my $usage_str = "
34
$0\n
35
Gas-preprocessor.pl converts assembler files using modern GNU as syntax for
36
Apple's ancient gas version or clang's incompatible integrated assembler. The
37
conversion is regularly tested for Libav, x264 and vlc. Other projects might
38
use different features which are not correctly handled.
39
40
Options for this program needs to be separated with ' -- ' from the assembler
41
command. Following options are currently supported:
42
43
-help - this usage text
44
-arch - target architecture
45
-as-type - one value out of {{,apple-}{gas,clang},armasm}
46
-fix-unreq
47
-no-fix-unreq
48
-force-thumb - assemble as thumb regardless of the input source
49
(note, this is incomplete and only works for sources
50
it explicitly was tested with)
51
";
52
53
sub usage() {
54
print $usage_str;
55
}
56
57
while (@ARGV) {
58
my $opt = shift;
59
60
if ($opt =~ /^-(no-)?fix-unreq$/) {
61
$fix_unreq = $1 ne "no-";
62
} elsif ($opt eq "-force-thumb") {
63
$force_thumb = 1;
64
} elsif ($opt eq "-arch") {
65
$arch = shift;
66
die "unknown arch: '$arch'\n" if not exists $comments{$arch};
67
} elsif ($opt eq "-as-type") {
68
$as_type = shift;
69
die "unknown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/;
70
} elsif ($opt eq "-help") {
71
usage();
72
exit 0;
73
} elsif ($opt eq "--" ) {
74
@gcc_cmd = @ARGV;
75
} elsif ($opt =~ /^-/) {
76
die "option '$opt' is not known. See '$0 -help' for usage information\n";
77
} else {
78
push @gcc_cmd, $opt, @ARGV;
79
}
80
last if (@gcc_cmd);
81
}
82
83
if (grep /\.c$/, @gcc_cmd) {
84
# C file (inline asm?) - compile
85
@preprocess_c_cmd = (@gcc_cmd, "-S");
86
} elsif (grep /\.[sS]$/, @gcc_cmd) {
87
# asm file, just do C preprocessor
88
@preprocess_c_cmd = (@gcc_cmd, "-E");
89
} elsif (grep /-(v|h|-version|dumpversion)/, @gcc_cmd) {
90
# pass -v/--version along, used during probing. Matching '-v' might have
91
# uninteded results but it doesn't matter much if gas-preprocessor or
92
# the compiler fails.
93
exec(@gcc_cmd);
94
} else {
95
die "Unrecognized input filetype";
96
}
97
if ($as_type eq "armasm") {
98
99
$preprocess_c_cmd[0] = "cpp";
100
push(@preprocess_c_cmd, "-U__ELF__");
101
push(@preprocess_c_cmd, "-U__MACH__");
102
103
@preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
104
# Remove -ignore XX parameter pairs from preprocess_c_cmd
105
my $index = 1;
106
while ($index < $#preprocess_c_cmd) {
107
if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < $#preprocess_c_cmd) {
108
splice(@preprocess_c_cmd, $index, 2);
109
next;
110
}
111
$index++;
112
}
113
if (grep /^-MM$/, @preprocess_c_cmd) {
114
system(@preprocess_c_cmd) == 0 or die "Error running preprocessor";
115
exit 0;
116
}
117
}
118
119
# if compiling, avoid creating an output file named '-.o'
120
if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
121
foreach my $i (@gcc_cmd) {
122
if ($i =~ /\.[csS]$/) {
123
my $outputfile = $i;
124
$outputfile =~ s/\.[csS]$/.o/;
125
push(@gcc_cmd, "-o");
126
push(@gcc_cmd, $outputfile);
127
last;
128
}
129
}
130
}
131
# replace only the '-o' argument with '-', avoids rewriting the make dependency
132
# target specified with -MT to '-'
133
my $index = 1;
134
while ($index < $#preprocess_c_cmd) {
135
if ($preprocess_c_cmd[$index] eq "-o") {
136
$index++;
137
$preprocess_c_cmd[$index] = "-";
138
}
139
$index++;
140
}
141
142
my $tempfile;
143
if ($as_type ne "armasm") {
144
@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
145
} else {
146
@preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
147
@preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd;
148
149
@preprocess_c_cmd = grep ! /^-G/, @preprocess_c_cmd;
150
@preprocess_c_cmd = grep ! /^-W/, @preprocess_c_cmd;
151
@preprocess_c_cmd = grep ! /^-Z/, @preprocess_c_cmd;
152
@preprocess_c_cmd = grep ! /^-fp/, @preprocess_c_cmd;
153
@preprocess_c_cmd = grep ! /^-EHsc$/, @preprocess_c_cmd;
154
@preprocess_c_cmd = grep ! /^-O/, @preprocess_c_cmd;
155
156
@gcc_cmd = grep ! /^-G/, @gcc_cmd;
157
@gcc_cmd = grep ! /^-W/, @gcc_cmd;
158
@gcc_cmd = grep ! /^-Z/, @gcc_cmd;
159
@gcc_cmd = grep ! /^-fp/, @gcc_cmd;
160
@gcc_cmd = grep ! /^-EHsc$/, @gcc_cmd;
161
@gcc_cmd = grep ! /^-O/, @gcc_cmd;
162
163
my @outfiles = grep /\.(o|obj)$/, @gcc_cmd;
164
$tempfile = $outfiles[0].".asm";
165
166
# Remove most parameters from gcc_cmd, which actually is the armasm command,
167
# which doesn't support any of the common compiler/preprocessor options.
168
@gcc_cmd = grep ! /^-D/, @gcc_cmd;
169
@gcc_cmd = grep ! /^-U/, @gcc_cmd;
170
@gcc_cmd = grep ! /^-m/, @gcc_cmd;
171
@gcc_cmd = grep ! /^-M/, @gcc_cmd;
172
@gcc_cmd = grep ! /^-c$/, @gcc_cmd;
173
@gcc_cmd = grep ! /^-I/, @gcc_cmd;
174
@gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd;
175
}
176
177
# detect architecture from gcc binary name
178
if (!$arch) {
179
if ($gcc_cmd[0] =~ /(arm64|aarch64|arm|powerpc|ppc)/) {
180
$arch = $1;
181
} else {
182
# look for -arch flag
183
foreach my $i (1 .. $#gcc_cmd-1) {
184
if ($gcc_cmd[$i] eq "-arch" and
185
$gcc_cmd[$i+1] =~ /(arm64|aarch64|arm|powerpc|ppc)/) {
186
$arch = $1;
187
}
188
}
189
}
190
}
191
192
# assume we're not cross-compiling if no -arch or the binary doesn't have the arch name
193
$arch = qx/arch/ if (!$arch);
194
195
die "Unknown target architecture '$arch'" if not exists $canonical_arch{$arch};
196
197
$arch = $canonical_arch{$arch};
198
$comm = $comments{$arch};
199
my $inputcomm = $comm;
200
$comm = ";" if $as_type =~ /armasm/;
201
202
my %ppc_spr = (ctr => 9,
203
vrsave => 256);
204
205
open(INPUT, "-|", @preprocess_c_cmd) || die "Error running preprocessor";
206
207
if ($ENV{GASPP_DEBUG}) {
208
open(ASMFILE, ">&STDOUT");
209
} else {
210
if ($as_type ne "armasm") {
211
open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
212
} else {
213
open(ASMFILE, ">", $tempfile);
214
}
215
}
216
217
my $current_macro = '';
218
my $macro_level = 0;
219
my $rept_level = 0;
220
my %macro_lines;
221
my %macro_args;
222
my %macro_args_default;
223
my $macro_count = 0;
224
my $altmacro = 0;
225
my $in_irp = 0;
226
227
my $num_repts;
228
my @rept_lines;
229
230
my @irp_args;
231
my $irp_param;
232
233
my @ifstack;
234
235
my %symbols;
236
237
my @sections;
238
239
my %literal_labels; # for ldr <reg>, =<expr>
240
my $literal_num = 0;
241
my $literal_expr = ".word";
242
$literal_expr = ".quad" if $arch eq "aarch64";
243
244
my $thumb = 0;
245
246
my %thumb_labels;
247
my %call_targets;
248
my %mov32_targets;
249
250
my %neon_alias_reg;
251
my %neon_alias_type;
252
253
my $temp_label_next = 0;
254
my %last_temp_labels;
255
my %next_temp_labels;
256
257
my %labels_seen;
258
259
my %aarch64_req_alias;
260
261
if ($force_thumb) {
262
parse_line(".thumb\n");
263
}
264
265
# pass 1: parse .macro
266
# note that the handling of arguments is probably overly permissive vs. gas
267
# but it should be the same for valid cases
268
while (<INPUT>) {
269
# remove lines starting with '#', preprocessing is done, '#' at start of
270
# the line indicates a comment for all supported archs (aarch64, arm, ppc
271
# and x86). Also strips line number comments but since they are off anyway
272
# it is no loss.
273
s/^#.*$//;
274
# remove all comments (to avoid interfering with evaluating directives)
275
s/(?<!\\)$inputcomm.*//x;
276
# Strip out windows linefeeds
277
s/\r$//;
278
279
foreach my $subline (split(";", $_)) {
280
# Add newlines at the end of lines that don't already have one
281
chomp $subline;
282
$subline .= "\n";
283
parse_line($subline);
284
}
285
}
286
287
sub eval_expr {
288
my $expr = $_[0];
289
while ($expr =~ /([A-Za-z._][A-Za-z0-9._]*)/g) {
290
my $sym = $1;
291
$expr =~ s/$sym/($symbols{$sym})/ if defined $symbols{$sym};
292
}
293
eval $expr;
294
}
295
296
sub handle_if {
297
my $line = $_[0];
298
# handle .if directives; apple's assembler doesn't support important non-basic ones
299
# evaluating them is also needed to handle recursive macros
300
if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) {
301
my $result = $1 eq "n";
302
my $type = $2;
303
my $expr = $3;
304
305
if ($type eq "b") {
306
$expr =~ s/\s//g;
307
$result ^= $expr eq "";
308
} elsif ($type eq "c") {
309
if ($expr =~ /(.*)\s*,\s*(.*)/) {
310
$result ^= $1 eq $2;
311
} else {
312
die "argument to .ifc not recognized";
313
}
314
} elsif ($type eq "") {
315
$result ^= eval_expr($expr) != 0;
316
} elsif ($type eq "eq") {
317
$result = eval_expr($expr) == 0;
318
} elsif ($type eq "lt") {
319
$result = eval_expr($expr) < 0;
320
} else {
321
chomp($line);
322
die "unhandled .if varient. \"$line\"";
323
}
324
push (@ifstack, $result);
325
return 1;
326
} else {
327
return 0;
328
}
329
}
330
331
sub parse_if_line {
332
my $line = $_[0];
333
334
# evaluate .if blocks
335
if (scalar(@ifstack)) {
336
# Don't evaluate any new if statements if we're within
337
# a repetition or macro - they will be evaluated once
338
# the repetition is unrolled or the macro is expanded.
339
if (scalar(@rept_lines) == 0 and $macro_level == 0) {
340
if ($line =~ /\.endif/) {
341
pop(@ifstack);
342
return 1;
343
} elsif ($line =~ /\.elseif\s+(.*)/) {
344
if ($ifstack[-1] == 0) {
345
$ifstack[-1] = !!eval_expr($1);
346
} elsif ($ifstack[-1] > 0) {
347
$ifstack[-1] = -$ifstack[-1];
348
}
349
return 1;
350
} elsif ($line =~ /\.else/) {
351
$ifstack[-1] = !$ifstack[-1];
352
return 1;
353
} elsif (handle_if($line)) {
354
return 1;
355
}
356
}
357
358
# discard lines in false .if blocks
359
foreach my $i (0 .. $#ifstack) {
360
if ($ifstack[$i] <= 0) {
361
return 1;
362
}
363
}
364
}
365
return 0;
366
}
367
368
sub parse_line {
369
my $line = $_[0];
370
371
return if (parse_if_line($line));
372
373
if (scalar(@rept_lines) == 0) {
374
if (/\.macro/) {
375
$macro_level++;
376
if ($macro_level > 1 && !$current_macro) {
377
die "nested macros but we don't have master macro";
378
}
379
} elsif (/\.endm/) {
380
$macro_level--;
381
if ($macro_level < 0) {
382
die "unmatched .endm";
383
} elsif ($macro_level == 0) {
384
$current_macro = '';
385
return;
386
}
387
}
388
}
389
390
if ($macro_level == 0) {
391
if ($line =~ /\.(rept|irp)/) {
392
$rept_level++;
393
} elsif ($line =~ /.endr/) {
394
$rept_level--;
395
}
396
}
397
398
if ($macro_level > 1) {
399
push(@{$macro_lines{$current_macro}}, $line);
400
} elsif (scalar(@rept_lines) and $rept_level >= 1) {
401
push(@rept_lines, $line);
402
} elsif ($macro_level == 0) {
403
expand_macros($line);
404
} else {
405
if ($line =~ /\.macro\s+([\d\w\.]+)\s*,?\s*(.*)/) {
406
$current_macro = $1;
407
408
# commas in the argument list are optional, so only use whitespace as the separator
409
my $arglist = $2;
410
$arglist =~ s/,/ /g;
411
412
my @args = split(/\s+/, $arglist);
413
foreach my $i (0 .. $#args) {
414
my @argpair = split(/=/, $args[$i]);
415
$macro_args{$current_macro}[$i] = $argpair[0];
416
$argpair[0] =~ s/:vararg$//;
417
$macro_args_default{$current_macro}{$argpair[0]} = $argpair[1];
418
}
419
# ensure %macro_lines has the macro name added as a key
420
$macro_lines{$current_macro} = [];
421
422
} elsif ($current_macro) {
423
push(@{$macro_lines{$current_macro}}, $line);
424
} else {
425
die "macro level without a macro name";
426
}
427
}
428
}
429
430
sub handle_set {
431
my $line = $_[0];
432
if ($line =~ /\.set\s+(.*),\s*(.*)/) {
433
$symbols{$1} = eval_expr($2);
434
return 1;
435
}
436
return 0;
437
}
438
439
sub expand_macros {
440
my $line = $_[0];
441
442
# handle .if directives; apple's assembler doesn't support important non-basic ones
443
# evaluating them is also needed to handle recursive macros
444
if (handle_if($line)) {
445
return;
446
}
447
448
if (/\.purgem\s+([\d\w\.]+)/) {
449
delete $macro_lines{$1};
450
delete $macro_args{$1};
451
delete $macro_args_default{$1};
452
return;
453
}
454
455
if ($line =~ /\.altmacro/) {
456
$altmacro = 1;
457
return;
458
}
459
460
if ($line =~ /\.noaltmacro/) {
461
$altmacro = 0;
462
return;
463
}
464
465
$line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro;
466
467
# Strip out the .set lines from the armasm output
468
return if (handle_set($line) and $as_type eq "armasm");
469
470
if ($line =~ /\.rept\s+(.*)/) {
471
$num_repts = $1;
472
@rept_lines = ("\n");
473
474
# handle the possibility of repeating another directive on the same line
475
# .endr on the same line is not valid, I don't know if a non-directive is
476
if ($num_repts =~ s/(\.\w+.*)//) {
477
push(@rept_lines, "$1\n");
478
}
479
$num_repts = eval_expr($num_repts);
480
} elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) {
481
$in_irp = 1;
482
$num_repts = 1;
483
@rept_lines = ("\n");
484
$irp_param = $1;
485
486
# only use whitespace as the separator
487
my $irp_arglist = $2;
488
$irp_arglist =~ s/,/ /g;
489
$irp_arglist =~ s/^\s+//;
490
@irp_args = split(/\s+/, $irp_arglist);
491
} elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) {
492
$in_irp = 1;
493
$num_repts = 1;
494
@rept_lines = ("\n");
495
$irp_param = $1;
496
497
my $irp_arglist = $2;
498
$irp_arglist =~ s/,/ /g;
499
$irp_arglist =~ s/^\s+//;
500
@irp_args = split(//, $irp_arglist);
501
} elsif ($line =~ /\.endr/) {
502
my @prev_rept_lines = @rept_lines;
503
my $prev_in_irp = $in_irp;
504
my @prev_irp_args = @irp_args;
505
my $prev_irp_param = $irp_param;
506
my $prev_num_repts = $num_repts;
507
@rept_lines = ();
508
$in_irp = 0;
509
@irp_args = '';
510
511
if ($prev_in_irp != 0) {
512
foreach my $i (@prev_irp_args) {
513
foreach my $origline (@prev_rept_lines) {
514
my $line = $origline;
515
$line =~ s/\\$prev_irp_param/$i/g;
516
$line =~ s/\\\(\)//g; # remove \()
517
parse_line($line);
518
}
519
}
520
} else {
521
for (1 .. $prev_num_repts) {
522
foreach my $origline (@prev_rept_lines) {
523
my $line = $origline;
524
parse_line($line);
525
}
526
}
527
}
528
} elsif ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) {
529
handle_serialized_line($1);
530
my $macro = $2;
531
532
# commas are optional here too, but are syntactically important because
533
# parameters can be blank
534
my @arglist = split(/,/, $3);
535
my @args;
536
my @args_seperator;
537
538
my $comma_sep_required = 0;
539
foreach (@arglist) {
540
# allow arithmetic/shift operators in macro arguments
541
$_ =~ s/\s*(\+|-|\*|\/|<<|>>|<|>)\s*/$1/g;
542
543
my @whitespace_split = split(/\s+/, $_);
544
if (!@whitespace_split) {
545
push(@args, '');
546
push(@args_seperator, '');
547
} else {
548
foreach (@whitespace_split) {
549
#print ("arglist = \"$_\"\n");
550
if (length($_)) {
551
push(@args, $_);
552
my $sep = $comma_sep_required ? "," : " ";
553
push(@args_seperator, $sep);
554
#print ("sep = \"$sep\", arg = \"$_\"\n");
555
$comma_sep_required = 0;
556
}
557
}
558
}
559
560
$comma_sep_required = 1;
561
}
562
563
my %replacements;
564
if ($macro_args_default{$macro}){
565
%replacements = %{$macro_args_default{$macro}};
566
}
567
568
# construct hashtable of text to replace
569
foreach my $i (0 .. $#args) {
570
my $argname = $macro_args{$macro}[$i];
571
my @macro_args = @{ $macro_args{$macro} };
572
if ($args[$i] =~ m/=/) {
573
# arg=val references the argument name
574
# XXX: I'm not sure what the expected behaviour if a lot of
575
# these are mixed with unnamed args
576
my @named_arg = split(/=/, $args[$i]);
577
$replacements{$named_arg[0]} = $named_arg[1];
578
} elsif ($i > $#{$macro_args{$macro}}) {
579
# more args given than the macro has named args
580
# XXX: is vararg allowed on arguments before the last?
581
$argname = $macro_args{$macro}[-1];
582
if ($argname =~ s/:vararg$//) {
583
#print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n";
584
#$replacements{$argname} .= ", $args[$i]";
585
$replacements{$argname} .= "$args_seperator[$i] $args[$i]";
586
} else {
587
die "Too many arguments to macro $macro";
588
}
589
} else {
590
$argname =~ s/:vararg$//;
591
$replacements{$argname} = $args[$i];
592
}
593
}
594
595
my $count = $macro_count++;
596
597
# apply replacements as regex
598
foreach (@{$macro_lines{$macro}}) {
599
my $macro_line = $_;
600
# do replacements by longest first, this avoids wrong replacement
601
# when argument names are subsets of each other
602
foreach (reverse sort {length $a <=> length $b} keys %replacements) {
603
$macro_line =~ s/\\$_/$replacements{$_}/g;
604
}
605
if ($altmacro) {
606
foreach (reverse sort {length $a <=> length $b} keys %replacements) {
607
$macro_line =~ s/\b$_\b/$replacements{$_}/g;
608
}
609
}
610
$macro_line =~ s/\\\@/$count/g;
611
$macro_line =~ s/\\\(\)//g; # remove \()
612
parse_line($macro_line);
613
}
614
} else {
615
handle_serialized_line($line);
616
}
617
}
618
619
sub is_arm_register {
620
my $name = $_[0];
621
if ($name eq "lr" or
622
$name eq "ip" or
623
$name =~ /^[rav]\d+$/) {
624
return 1;
625
}
626
return 0;
627
}
628
629
sub handle_local_label {
630
my $line = $_[0];
631
my $num = $_[1];
632
my $dir = $_[2];
633
my $target = "$num$dir";
634
if ($dir eq "b") {
635
$line =~ s/$target/$last_temp_labels{$num}/g;
636
} else {
637
my $name = "temp_label_$temp_label_next";
638
$temp_label_next++;
639
push(@{$next_temp_labels{$num}}, $name);
640
$line =~ s/$target/$name/g;
641
}
642
return $line;
643
}
644
645
sub handle_serialized_line {
646
my $line = $_[0];
647
648
# handle .previous (only with regard to .section not .subsection)
649
if ($line =~ /\.(section|text|const_data)/) {
650
push(@sections, $line);
651
} elsif ($line =~ /\.previous/) {
652
if (!$sections[-2]) {
653
die ".previous without a previous section";
654
}
655
$line = $sections[-2];
656
push(@sections, $line);
657
}
658
659
$thumb = 1 if $line =~ /\.code\s+16|\.thumb/;
660
$thumb = 0 if $line =~ /\.code\s+32|\.arm/;
661
662
# handle ldr <reg>, =<expr>
663
if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/ and $as_type ne "armasm") {
664
my $label = $literal_labels{$3};
665
if (!$label) {
666
$label = "Literal_$literal_num";
667
$literal_num++;
668
$literal_labels{$3} = $label;
669
}
670
$line = "$1 ldr$2, $label\n";
671
} elsif ($line =~ /\.ltorg/ and $as_type ne "armasm") {
672
$line .= ".align 2\n";
673
foreach my $literal (keys %literal_labels) {
674
$line .= "$literal_labels{$literal}:\n $literal_expr $literal\n";
675
}
676
%literal_labels = ();
677
}
678
679
# handle GNU as pc-relative relocations for adrp/add
680
if ($line =~ /(.*)\s*adrp([\w\s\d]+)\s*,\s*#?:pg_hi21:([^\s]+)/) {
681
$line = "$1 adrp$2, ${3}\@PAGE\n";
682
} elsif ($line =~ /(.*)\s*add([\w\s\d]+)\s*,([\w\s\d]+)\s*,\s*#?:lo12:([^\s]+)/) {
683
$line = "$1 add$2, $3, ${4}\@PAGEOFF\n";
684
}
685
686
# thumb add with large immediate needs explicit add.w
687
if ($thumb and $line =~ /add\s+.*#([^@]+)/) {
688
$line =~ s/add/add.w/ if eval_expr($1) > 255;
689
}
690
691
# mach-o local symbol names start with L (no dot)
692
$line =~ s/(?<!\w)\.(L\w+)/$1/g;
693
694
# recycle the '.func' directive for '.thumb_func'
695
if ($thumb and $as_type =~ /^apple-/) {
696
$line =~ s/\.func/.thumb_func/x;
697
}
698
699
if ($thumb and $line =~ /^\s*(\w+)\s*:/) {
700
$thumb_labels{$1}++;
701
}
702
703
if ($as_type =~ /^apple-/ and
704
$line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.global)\s+(\w+)/) {
705
my $cond = $3;
706
my $label = $4;
707
# Don't interpret e.g. bic as b<cc> with ic as conditional code
708
if ($cond =~ /|$arm_cond_codes/) {
709
if (exists $thumb_labels{$label}) {
710
print ASMFILE ".thumb_func $label\n";
711
} else {
712
$call_targets{$label}++;
713
}
714
}
715
}
716
717
# @l -> lo16() @ha -> ha16()
718
$line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g;
719
$line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g;
720
721
# move to/from SPR
722
if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) {
723
if ($2 eq 'mt') {
724
$line = "$1${2}spr $ppc_spr{$3}, $4\n";
725
} else {
726
$line = "$1${2}spr $4, $ppc_spr{$3}\n";
727
}
728
}
729
730
if ($line =~ /\.unreq\s+(.*)/) {
731
if (defined $neon_alias_reg{$1}) {
732
delete $neon_alias_reg{$1};
733
delete $neon_alias_type{$1};
734
return;
735
} elsif (defined $aarch64_req_alias{$1}) {
736
delete $aarch64_req_alias{$1};
737
return;
738
}
739
}
740
# old gas versions store upper and lower case names on .req,
741
# but they remove only one on .unreq
742
if ($fix_unreq) {
743
if ($line =~ /\.unreq\s+(.*)/) {
744
$line = ".unreq " . lc($1) . "\n";
745
$line .= ".unreq " . uc($1) . "\n";
746
}
747
}
748
749
if ($line =~ /(\w+)\s+\.(dn|qn)\s+(\w+)(?:\.(\w+))?(\[\d+\])?/) {
750
$neon_alias_reg{$1} = "$3$5";
751
$neon_alias_type{$1} = $4;
752
return;
753
}
754
if (scalar keys %neon_alias_reg > 0 && $line =~ /^\s+v\w+/) {
755
# This line seems to possibly have a neon instruction
756
foreach (keys %neon_alias_reg) {
757
my $alias = $_;
758
# Require the register alias to match as an invididual word, not as a substring
759
# of a larger word-token.
760
if ($line =~ /\b$alias\b/) {
761
$line =~ s/\b$alias\b/$neon_alias_reg{$alias}/g;
762
# Add the type suffix. If multiple aliases match on the same line,
763
# only do this replacement the first time (a vfoo.bar string won't match v\w+).
764
$line =~ s/^(\s+)(v\w+)(\s+)/$1$2.$neon_alias_type{$alias}$3/;
765
}
766
}
767
}
768
769
if ($arch eq "aarch64" or $as_type eq "armasm") {
770
# clang's integrated aarch64 assembler in Xcode 5 does not support .req/.unreq
771
if ($line =~ /\b(\w+)\s+\.req\s+(\w+)\b/) {
772
$aarch64_req_alias{$1} = $2;
773
return;
774
}
775
foreach (keys %aarch64_req_alias) {
776
my $alias = $_;
777
# recursively resolve aliases
778
my $resolved = $aarch64_req_alias{$alias};
779
while (defined $aarch64_req_alias{$resolved}) {
780
$resolved = $aarch64_req_alias{$resolved};
781
}
782
$line =~ s/\b$alias\b/$resolved/g;
783
}
784
}
785
if ($arch eq "aarch64") {
786
# fix missing aarch64 instructions in Xcode 5.1 (beta3)
787
# mov with vector arguments is not supported, use alias orr instead
788
if ($line =~ /^\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
789
$line = " orr $1, $2, $2\n";
790
}
791
# movi 16, 32 bit shifted variant, shift is optional
792
if ($line =~ /^\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) {
793
$line = " movi $1, $2, lsl #0\n";
794
}
795
# Xcode 5 misses the alias uxtl. Replace it with the more general ushll.
796
# Clang 3.4 misses the alias sxtl too. Replace it with the more general sshll.
797
if ($line =~ /^\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) {
798
$line = " $1shll$2 $3, $4, #0\n";
799
}
800
# clang 3.4 does not automatically use shifted immediates in add/sub
801
if ($as_type eq "clang" and
802
$line =~ /^(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) {
803
my $imm = eval $3;
804
if ($imm > 4095 and not ($imm & 4095)) {
805
$line = "$1 $2#" . ($imm >> 12) . ", lsl #12\n";
806
}
807
}
808
if ($ENV{GASPP_FIX_XCODE5}) {
809
if ($line =~ /^\s*bsl\b/) {
810
$line =~ s/\b(bsl)(\s+v[0-3]?\d\.(\w+))\b/$1.$3$2/;
811
$line =~ s/\b(v[0-3]?\d)\.$3\b/$1/g;
812
}
813
if ($line =~ /^\s*saddl2?\b/) {
814
$line =~ s/\b(saddl2?)(\s+v[0-3]?\d\.(\w+))\b/$1.$3$2/;
815
$line =~ s/\b(v[0-3]?\d)\.\w+\b/$1/g;
816
}
817
if ($line =~ /^\s*dup\b.*\]$/) {
818
$line =~ s/\bdup(\s+v[0-3]?\d)\.(\w+)\b/dup.$2$1/g;
819
$line =~ s/\b(v[0-3]?\d)\.[bhsdBHSD](\[\d\])$/$1$2/g;
820
}
821
}
822
}
823
824
if ($as_type eq "armasm") {
825
# Also replace variables set by .set
826
foreach (keys %symbols) {
827
my $sym = $_;
828
$line =~ s/\b$sym\b/$symbols{$sym}/g;
829
}
830
831
# Handle function declarations and keep track of the declared labels
832
if ($line =~ s/^\s*\.func\s+(\w+)/$1 PROC/) {
833
$labels_seen{$1} = 1;
834
}
835
836
if ($line =~ s/^\s*(\d+)://) {
837
# Convert local labels into unique labels. armasm (at least in
838
# RVCT) has something similar, but still different enough.
839
# By converting to unique labels we avoid any possible
840
# incompatibilities.
841
842
my $num = $1;
843
foreach (@{$next_temp_labels{$num}}) {
844
$line = "$_\n" . $line;
845
}
846
@next_temp_labels{$num} = ();
847
my $name = "temp_label_$temp_label_next";
848
$temp_label_next++;
849
# The matching regexp above removes the label from the start of
850
# the line (which might contain an instruction as well), readd
851
# it on a separate line above it.
852
$line = "$name:\n" . $line;
853
$last_temp_labels{$num} = $name;
854
}
855
856
if ($line =~ s/^(\w+):/$1/) {
857
# Skip labels that have already been declared with a PROC,
858
# labels must not be declared multiple times.
859
return if (defined $labels_seen{$1});
860
$labels_seen{$1} = 1;
861
} elsif ($line !~ /(\w+) PROC/) {
862
# If not a label, make sure the line starts with whitespace,
863
# otherwise ms armasm interprets it incorrectly.
864
$line =~ s/^[\.\w]/\t$&/;
865
}
866
867
868
# Check branch instructions
869
if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?(..)?(\.w)?)\s+(\w+)/) {
870
my $instr = $2;
871
my $cond = $3;
872
my $width = $4;
873
my $target = $5;
874
# Don't interpret e.g. bic as b<cc> with ic as conditional code
875
if ($cond !~ /|$arm_cond_codes/) {
876
# Not actually a branch
877
} elsif ($target =~ /(\d+)([bf])/) {
878
# The target is a local label
879
$line = handle_local_label($line, $1, $2);
880
$line =~ s/\b$instr\b/$&.w/ if $width eq "";
881
} elsif (!is_arm_register($target)) {
882
$call_targets{$target}++;
883
}
884
} elsif ($line =~ /^\s*.h?word.*\b\d+[bf]\b/) {
885
while ($line =~ /\b(\d+)([bf])\b/g) {
886
$line = handle_local_label($line, $1, $2);
887
}
888
}
889
890
# ALIGN in armasm syntax is the actual number of bytes
891
if ($line =~ /\.align\s+(\d+)/) {
892
my $align = 1 << $1;
893
$line =~ s/\.align\s(\d+)/ALIGN $align/;
894
}
895
# Convert gas style [r0, :128] into armasm [r0@128] alignment specification
896
$line =~ s/\[([^\[]+),\s*:(\d+)\]/[$1\@$2]/g;
897
898
# armasm treats logical values {TRUE} and {FALSE} separately from
899
# numeric values - logical operators and values can't be intermixed
900
# with numerical values. Evaluate !<number> and (a <> b) into numbers,
901
# let the assembler evaluate the rest of the expressions. This current
902
# only works for cases when ! and <> are used with actual constant numbers,
903
# we don't evaluate subexpressions here.
904
905
# Evaluate !<number>
906
while ($line =~ /!\s*(\d+)/g) {
907
my $val = ($1 != 0) ? 0 : 1;
908
$line =~ s/!(\d+)/$val/;
909
}
910
# Evaluate (a > b)
911
while ($line =~ /\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/) {
912
my $val;
913
if ($2 eq "<") {
914
$val = ($1 < $3) ? 1 : 0;
915
} else {
916
$val = ($1 > $3) ? 1 : 0;
917
}
918
$line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/;
919
}
920
921
# Change a movw... #:lower16: into a mov32 pseudoinstruction
922
$line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
923
# and remove the following, matching movt completely
924
$line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
925
926
if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
927
$mov32_targets{$1}++;
928
}
929
930
# Misc bugs/deficiencies:
931
# armasm seems unable to parse e.g. "vmov s0, s1" without a type
932
# qualifier, thus add .f32.
933
$line =~ s/^(\s+(?:vmov|vadd))(\s+s)/$1.f32$2/;
934
# armasm is unable to parse &0x - add spacing
935
$line =~ s/&0x/& 0x/g;
936
}
937
938
if ($force_thumb) {
939
# Convert register post indexing to a separate add instruction.
940
# This converts e.g. "ldr r0, [r1], r2" into "ldr r0, [r1]",
941
# "add r1, r1, r2".
942
$line =~ s/(ldr|str)\s+(\w+),\s*\[(\w+)\],\s*(\w+)/$1 $2, [$3]\n\tadd $3, $3, $4/g;
943
944
# Convert "mov pc, lr" into "bx lr", since the former only works
945
# for switching from arm to thumb (and only in armv7), but not
946
# from thumb to arm.
947
s/mov\s*pc\s*,\s*lr/bx lr/g;
948
949
# Convert stmdb/ldmia with only one register into a plain str/ldr with post-increment/decrement
950
$line =~ s/stmdb\s+sp!\s*,\s*\{([^,-]+)\}/str $1, [sp, #-4]!/g;
951
$line =~ s/ldmia\s+sp!\s*,\s*\{([^,-]+)\}/ldr $1, [sp], #4/g;
952
953
$line =~ s/\.arm/.thumb/x;
954
}
955
956
# comment out unsupported directives
957
$line =~ s/\.type/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
958
$line =~ s/\.func/$comm$&/x if $as_type =~ /^(apple-|clang)/;
959
$line =~ s/\.endfunc/$comm$&/x if $as_type =~ /^(apple-|clang)/;
960
$line =~ s/\.endfunc/ENDP/x if $as_type =~ /armasm/;
961
$line =~ s/\.ltorg/$comm$&/x if $as_type =~ /^(apple-|clang)/;
962
$line =~ s/\.ltorg/LTORG/x if $as_type eq "armasm";
963
$line =~ s/\.size/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
964
$line =~ s/\.fpu/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
965
$line =~ s/\.arch/$comm$&/x if $as_type =~ /^(apple-|clang|armasm)/;
966
$line =~ s/\.object_arch/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
967
$line =~ s/.section\s+.note.GNU-stack.*/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
968
969
$line =~ s/\.syntax/$comm$&/x if $as_type =~ /armasm/;
970
971
$line =~ s/\.hword/.short/x;
972
973
if ($as_type =~ /^apple-/) {
974
# the syntax for these is a little different
975
$line =~ s/\.global/.globl/x;
976
# also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
977
$line =~ s/(.*)\.rodata/.const_data/x;
978
$line =~ s/\.int/.long/x;
979
$line =~ s/\.float/.single/x;
980
}
981
if ($as_type eq "armasm") {
982
$line =~ s/\.global/EXPORT/x;
983
$line =~ s/\.int/dcd/x;
984
$line =~ s/\.long/dcd/x;
985
$line =~ s/\.float/dcfs/x;
986
$line =~ s/\.word/dcd/x;
987
$line =~ s/\.short/dcw/x;
988
$line =~ s/\.byte/dcb/x;
989
$line =~ s/\.thumb/THUMB/x;
990
$line =~ s/\.arm/ARM/x;
991
# The alignment in AREA is the power of two, just as .align in gas
992
$line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=2, CODEALIGN/;
993
$line =~ s/(\s*)(.*)\.rodata/$1AREA |.rodata|, DATA, READONLY, ALIGN=5/;
994
995
$line =~ s/fmxr/vmsr/;
996
$line =~ s/fmrx/vmrs/;
997
$line =~ s/fadds/vadd.f32/;
998
}
999
1000
# catch unknown section names that aren't mach-o style (with a comma)
1001
if ($as_type =~ /apple-/ and $line =~ /.section ([^,]*)$/) {
1002
die ".section $1 unsupported; figure out the mach-o section name and add it";
1003
}
1004
1005
print ASMFILE $line;
1006
}
1007
1008
if ($as_type ne "armasm") {
1009
print ASMFILE ".text\n";
1010
print ASMFILE ".align 2\n";
1011
foreach my $literal (keys %literal_labels) {
1012
print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n";
1013
}
1014
1015
map print(ASMFILE ".thumb_func $_\n"),
1016
grep exists $thumb_labels{$_}, keys %call_targets;
1017
} else {
1018
map print(ASMFILE "\tIMPORT $_\n"),
1019
grep ! exists $labels_seen{$_}, (keys %call_targets, keys %mov32_targets);
1020
1021
print ASMFILE "\tEND\n";
1022
}
1023
1024
close(INPUT) or exit 1;
1025
close(ASMFILE) or exit 1;
1026
if ($as_type eq "armasm" and ! defined $ENV{GASPP_DEBUG}) {
1027
system(@gcc_cmd) == 0 or die "Error running assembler";
1028
}
1029
1030
END {
1031
unlink($tempfile) if defined $tempfile;
1032
}
1033
#exit 1
1034
1035