-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcompiler.ml
2696 lines (2209 loc) · 109 KB
/
compiler.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(* compiler.ml -- combined type checking and code generation pass *)
(* Use Emacs' outline-minor-mode to navigate *)
(* "egrep -nH -e '\(\* \*|^let|^and' compiler.ml" for a rough index *)
(* The "See section n.n.n" comments refer to 'Algol W Language Description, June 1972'. *)
(* * GNU bafflegab. ----------------------------------------------------------------*)
(* This file is part of Awe. Copyright 2012 Glyn Webster.
Awe is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Awe is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public
License along with Awe. If not, see <http://www.gnu.org/licenses/>.
*)
open Printf
(* * Support types ---------------------------------------------------------------- *)
open Type (* for ClassSet module, and all sorts of types and variants *)
open Table (* for Id module *)
(* Algol expressions and designators are translated into scraps of C code. *)
(* There will be lots of these typed_code_t values flying around.*)
type typed_code_t = {
c : Code.t; (* The C code translation of the expression or designator. *)
t : Type.simple_t (* The Algol "simple type" of the expression or designator. *)
}
(* Procedure actual parameters may be either designators or expressions.
How they are used depends on the corresponding formal parameter. *)
type designator_or_expression_t =
| Designator of typed_code_t
| Expression of typed_code_t
(* Designators need to be translated into C pointer lvalues in some places,
C rvalues in others. (See the function 'designator_or_expression' for details.) *)
type designator_t =
| Pointer
| Lvalue
(* These types are used to accumulate data and C code for the declarations in block bodies: *)
(* The Algol scope and C code fragments for constructing a block. See the 'block_expression'. *)
type block_t = {
scope : Scope.t; (* the scope introduced by the block. *)
outsidescope : Code.t; (* code to be executed outside the block (array bounds expressions.) *)
labels : Code.t; (* __label__ definitions, which must appear first in the C block *)
prototypes : Code.t; (* function prototypes, which must all appear before any function definitions *)
structs : Code.t; (* struct declarations for record classes *)
variables : Code.t; (* simple variable declarations *)
functions : Code.t; (* function definitions *)
initialization : Code.t; (* assignment statements to initialize simple variables and arrays *)
procedures : procedure_header_t list (* see below *)
}
(* An Algol procedure declaration, and its C function header. The translation of a procedure's
body is delayed until all definitions in its surrounding block are known.
See 'add_procedure_functions' and 'add_procedure_declaration' *)
and procedure_header_t = {
returntype : Type.simple_t;
proc_id : Id.t;
proc_loc : Location.t;
parameters : formal_parameters_t;
header : Code.t; (* C-code header for a procedure's C function and prototype *)
body : Tree.t (* the parse tree of the body of a procedure, saved for later. *)
}
and formal_parameters_t = {
procedure_locals : Scope.Local.t; (* definitions of the formal parameters, valid in a procedure's body *)
formal_types : Type.formal_t list; (* the formal types of the parameters, in order *)
arguments : Code.t; (* the C function arguments for the parameters *)
}
(* Pieces of C code gathered while translating procedure actual parameters:
*)
and actual_parameters_t = {
decls : Code.t; (* temporary variable and thunk function declarations *)
precall : Code.t; (* assignments to VALUE RESULT temporary variables *)
args : Code.t; (* C function call arguments *)
postcall: Code.t (* assignments to RESULT parameter variables *)
}
let empty_block =
{ scope = Scope.empty;
outsidescope = Code.empty;
prototypes = Code.empty;
labels = Code.empty;
variables = Code.empty;
structs = Code.empty;
functions = Code.empty;
initialization = Code.empty;
procedures = []
}
let empty_formal_parameters =
{ procedure_locals = Scope.Local.empty;
formal_types = [];
arguments = Code.empty }
let empty_actual_parameters =
{ decls = Code.empty;
precall = Code.empty;
args = Code.empty;
postcall = Code.empty }
(* * Support functions ---------------------------------------------------------------- *)
(* Shorthand for constucting C code scraps. *)
let ($$) template args = Code.template template args
let (@$) a b = Code.add a b
let (@$.) a b = Code.add_with_comma a b
(* The Error exception is for reporting errors in the Algol code.
These exceptions are caught at the outer level of the compiler and printed.
There is no error recovery (but Awe can compile an Algol 68 compiler
fast as you can blink.)
Note: assertion failures and Failure exceptions indicate bugs in the compiler,
they are never used to signal compilation error messages. *)
exception Error of Location.t * string
(* 'error loc "message" arg1 arg2 ...' is a printf-like function for raising Error.
'warning' gives a compiler warning, directed to stderr. (E.g. notes about the use of Name parameters.)
'output' prints information to stdout. (E.g. the C prototypes of functions that must be supplied externally.) *)
let error loc = Printf.ksprintf (fun message -> (raise (Error(loc, message))))
let warning loc = Printf.kprintf (fun message -> prerr_endline (Location.to_string loc ^ " " ^ message))
let output = Printf.printf
(* map a binary function over a sequence of integers and a list. *)
let mapi (start : int) (f : int -> 'a -> 'b) (xs : 'a list) : 'b list =
let rec loop i =
function
| [] -> []
| x :: xs' -> f i x :: loop (i + 1) xs'
in
loop start xs
(* map a unary function over a range of integers. *)
let mapn (first : int) (last : int) (f : int -> 'a) : 'a list =
assert (first <= last);
let rec loop i xs =
if i >= first then
loop (i - 1) (f i :: xs)
else
xs
in
loop last []
(* 'snip_last' seperates the last element from the rest of a list *)
let snip_last (xs : 'a list) : ('a list * 'a) =
match List.rev xs with
| [] -> failwith "snip_last"
| [x] -> ([], x)
| x :: xs' -> (List.rev xs', x)
(* * Types and scopes ---------------------------------------------------------------- *)
(* 'set loc scope id defn' sets 'id' to 'defn' in 'scope' and returns the modified scope.
It reports an error at source location 'loc' if there is already a definition for
'id' in the innermost block. *)
let set (loc : Location.t) (scope : Scope.t) (id : Id.t) (defn : Type.definition_t) =
try
Scope.set scope id defn
with Scope.Redefined (_, defn) ->
error loc "'%s' is already defined here, as %s" (Id.to_string id) (describe_definition defn)
(* 'set_local' is the same as 'set', but works on a single-level scope.
This is used when accumulating the formal parameters of a procedure. *)
let set_local (loc : Location.t) (scope : Scope.Local.t) (id : Id.t) (defn : Type.definition_t) : Scope.Local.t =
try
Scope.Local.set scope id defn
with Scope.Redefined (_, defn) ->
error loc "'%s' is already defined here, as %s" (Id.to_string id) (describe_definition defn)
(* 'get loc scope id' gets the definition of 'id' in 'scope'.
It reports an error at source location 'loc' if 'id' is not defined in 'scope'. *)
let get (loc : Location.t) (scope : Scope.t) (id : Id.t) : Type.definition_t =
try
Scope.get scope id
with Scope.Undefined _ ->
error loc "'%s' is undefined here" (Id.to_string id)
(* 'simple scope tree' returns the Algol simple type represented by a parse tree.
The 'scope' parameter is used to look up reference class identifiers. *)
let simple (scope : Scope.t) (tree : Tree.t) : Type.simple_t =
match tree with
| Tree.INTEGER -> Number (Short, Integer)
| Tree.REAL -> Number (Short, Real)
| Tree.LONG_REAL -> Number (Long, Real)
| Tree.COMPLEX -> Number (Short, Complex)
| Tree.LONG_COMPLEX -> Number (Long, Complex)
| Tree.LOGICAL -> Logical
| Tree.STRING (Some length) -> String length
| Tree.STRING (None) -> String 16
| Tree.BITS None -> Bits 32
| Tree.BITS (Some width) -> Bits width
| Tree.REFERENCE (loc, []) ->
failwith "Compiler.simple: tree is Tree.REFERENCE with no ids."
| Tree.REFERENCE (loc, [id]) ->
( match get loc scope id with
| Record (n, _) -> Reference (ClassSet.singleton n)
| defn -> error loc "expected %s to be a record class, it is %s" (Id.to_string id) (describe_definition defn) )
| Tree.REFERENCE (loc, ids) ->
let add_class classes id =
match get loc scope id with
| Record (n, _) -> ClassSet.add n classes
| defn -> error loc "expected %s to be a record class, it is %s" (Id.to_string id) (describe_definition defn)
in
Reference (List.fold_left add_class ClassSet.empty ids)
| _ ->
failwith "Compiler.simple: tree does not describe a simple type."
(* * C code generation ---------------------------------------------------------------- *)
(* This is the header for all C file generated by Awe. (I've seen people distributing
the AWE generated C files in place of Algol W sources. Code is going lost. *)
let notice : Code.t = Code.string
"/* ****************************************************************************
This is a TEMPORARY file generated by the Awe Algol W compiler
from an Algol W source file. You should not edit this file or store it
in a version control system; delete it freely as you would any other
temporary object file. You should NOT distribute this file,
instead distribute your Algol W source file and the Awe compiler.
**************************************************************************** */
"
let is_valid_c_identifier s = (* true if 's' is a valid GCC C identifier. *)
let first c = (c = '_' || c = '$' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) in
let rest c = (first c || (c >= '0' && c <= '9')) in
try
assert (String.length s > 0);
assert (first s.[0]);
for i = 1 to String.length s - 1 do
assert (rest s.[i])
done;
true
with _ ->
false
(* 'blank_string n' return a C string constant containing n spaces *)
let blank_string (n : int) : Code.t =
Code.string ("(_awe_str)\"" ^ String.make n ' ' ^ "\"")
let code_of_int (i : int) : Code.t = Code.string (string_of_int i)
(* 'code_of_loc' returns an C code function argument denoting an Algol source location.
This is used in function calls that might raise runtime errors. *)
let code_of_loc (loc : Location.t) : Code.t =
Code.string ( sprintf "_awe_at(%i,%i,%i)"
(Location.file_number loc)
(Location.line loc)
(Location.column loc) )
let c_char_const (character : string) : Code.t =
assert (String.length character = 1);
Code.string
( match character.[0] with
| '\x00' -> "'\\0'"
| '\x07' -> "'\\a'"
| '\x08' -> "'\\b'"
| '\x09' -> "'\\t'"
| '\x0A' -> "'\\n'"
| '\x0B' -> "'\\v'"
| '\x0C' -> "'\\f'"
| '\x0D' -> "'\\r'"
| '\'' -> "'\\''"
| '"' -> "'\"'"
| '\\' -> "'\\\\'"
| c when (c >= ' ' && c <= '~') -> sprintf "'%c'" c
| c -> sprintf "'\\x%02X'" (int_of_char c) )
let c_str_const (s : string) : Code.t =
let b = Buffer.create 32 in
Buffer.add_char b '"';
for i = 0 to String.length s - 1 do
match s.[i] with
| '\x00' -> Buffer.add_string b "\\0"
| '\x07' -> Buffer.add_string b "\\a"
| '\x08' -> Buffer.add_string b "\\b"
| '\x09' -> Buffer.add_string b "\\t"
| '\x0A' -> Buffer.add_string b "\\n"
| '\x0B' -> Buffer.add_string b "\\v"
| '\x0C' -> Buffer.add_string b "\\f"
| '\x0D' -> Buffer.add_string b "\\r"
| '\'' -> Buffer.add_string b "\\'"
| '"' -> Buffer.add_string b "\\\""
| '\\' -> Buffer.add_string b "\\\\"
| c when (c >= ' ' && c <= '~') -> Buffer.add_char b c
| c -> bprintf b "\" \"\\x%02X\" \"" (int_of_char c)
(* Isolate hexadecimal escape codes in their own C string constants,
they will confuse GCC if followed by digits. *)
done;
Buffer.add_char b '"';
Code.string (Buffer.contents b)
(* 'cast loc t e' returns the C code that casts a typed_code_t expression 'e' to Algol simple type 't'.
It raises a type error if the cast is not allowed by Algol's assignment compatibility
rules (See section 7.7.2.) In most cases C's type casting rules do the right thing already.
'loc' is used in reference class casts that can fail at runtime. See sections 5.4 and 6.1.2. *)
let cast (loc : Location.t) (t : simple_t) (e : typed_code_t) : Code.t =
if assignment_compatible t e.t then
match t, e.t with
| String 1, String 1 ->
e.c
| String n, String 1 ->
"_awe_str_cast_c($, $)" $$ [e.c; code_of_int n]
| String desired_length, String length when desired_length <> length ->
"_awe_str_cast($, $, $)" $$ [e.c; code_of_int length; code_of_int desired_length]
| Reference d_set, Reference e_set when not (ClassSet.subset e_set d_set) ->
let append_class_id c l = (Code.id (Class.to_id c)) :: l in
let class_list = Code.separate ", " (ClassSet.fold append_class_id d_set []) in
"_awe_ref_cast($, $, $)" $$ [code_of_loc loc; e.c; class_list]
| _, _ ->
e.c
else
error loc "%s is not compatible with %s" (describe_simple e.t) (describe_simple t)
(* 'default t' return the C code value for initializing variable of simple type 't'.
Record reference variables must be initalized to "_awe_uninitialized_reference" so that the runtime
can catch stray pointer errors. Most of the rest are rarely used. *)
let default (t : simple_t) : Code.t =
match t with
| Number (Long, Integer) -> failwith "default: Number(Long, Integer) shouldn't exist"
| Statement -> failwith "default: Statements cannot be initialized"
| Null -> failwith "default: NULL cannot be initialized"
| Number (Short, Integer) -> Code.string "0"
| Number (Short, Real) -> Code.string "0.0"
| Number (Long, Real) -> Code.string "0.0"
| Number (Short, Complex) -> Code.string "0.0"
| Number (Long, Complex) -> Code.string "0.0"
| Logical -> Code.string "0"
| Bits _ -> Code.string "0"
| String 1 -> Code.string "' '"
| String n -> blank_string n
| Reference(_) -> Code.string "_awe_uninitialized_reference"
(* STRING(1) is represented by C characters, which are passed and stored as
ordinary C rvalues.
Longer strings (which will be referred to as "STRING(s)")
are represented by the C type "_awe_str", which is a pointer to an
array of characters, because of this indirection STRING(n) has be
treated specially all through the compiler, but the special treatment
is mostly wrapped up in these next few functions. *)
(* This returns the C type used to pass and return values of an Algol simple type. *)
let ctype (t : simple_t) : Code.t =
match t with
| Number (Long, Integer) -> failwith "ctype: Number(Long, Integer) shouldn't exist"
| Number (Short, Integer) -> Code.string "int"
| Number (Short, Real) -> Code.string "double"
| Number (Long, Real) -> Code.string "double"
| Number (Short, Complex) -> Code.string "_Complex double" (* <complex.h> is not included *)
| Number (Long, Complex) -> Code.string "_Complex double"
| Statement -> Code.string "void" (* i.e. returns nothing *)
| Logical -> Code.string "int"
| String 1 -> Code.string "unsigned char"
| String _ -> Code.string "_awe_str"
| Reference(_) -> Code.string "void *"
| Null -> Code.string "void *"
| Bits w when w < 1 -> failwith (sprintf "ctype: BITS(%i) is impossible" w)
| Bits w when w <= 8 -> Code.string "unsigned char"
| Bits w when w <= 16 -> Code.string "unsigned short int"
| Bits w when w <= 32 -> Code.string "unsigned int"
| Bits w when w <= 64 -> Code.string "unsigned long long int"
| Bits w -> failwith (sprintf "ctype: BITS(%i) has no C representation" w)
(* A C bit mask constant to limit a word's width *)
let bit_width_mask (width : int) : Code.t =
Code.string (
if width = 1 then "1"
else if width <= 31 then sprintf "0x%x" (Int.shift_left 1 width - 1)
else if width = 32 then "0xffffffff"
else if width <= 63 then sprintf "0x%LxL" (Int64.sub (Int64.shift_left 1L width) 1L)
else "0xffffffffffffffffL" )
(* The size of the C type for an Algol simple type. *)
let sizeof_ctype (t : simple_t) : Code.t =
match t with
| String n -> code_of_int n (* characters are one byte wide *)
| _ -> "sizeof($)" $$ [ctype t]
(* The C type that points to a value of the simple type 't'. *)
let c_pointer_type (t : simple_t) : Code.t =
match t with
| String n when n > 1 -> Code.string "_awe_str "
| _ -> "$ *" $$ [ctype t]
(* Obtain a pointer to the 't' typed value stored in C variable 'var'. *)
let address_of (t : simple_t) (var : Code.t) : Code.t =
match t with
| String n when n > 1 -> var
| _ -> "&$" $$ [var]
(* Declare 'var' as a simple type 't' variable. *)
let declare_simple (t : simple_t) (var : Code.t) : Code.t =
match t with
| String n when n > 1 -> "unsigned char $[$];\n" $$ [var; code_of_int n] (* character array *)
| t -> "$ $;\n" $$ [ctype t; var]
(* The assignment expression that assigns expression 'expr' to C variable 'lvalue'. *)
let assignment_expression (loc : Location.t) (lvalue : typed_code_t) (expr : typed_code_t) : typed_code_t =
if assignment_compatible lvalue.t expr.t then
match lvalue.t, expr.t with
| String 1, String 1 -> (* character to character *)
{t = expr.t; c = "$ = $" $$ [lvalue.c; expr.c]}
| String dstlen, String 1 -> (* character to array *)
{t = expr.t; c = "_awe_str_cpy_sc($, $, $)" $$ [lvalue.c; code_of_int dstlen; expr.c]}
| String dstlen, String srclen -> (* array to array *)
{t = expr.t; c = "_awe_str_cpy($, $, $, $)" $$ [lvalue.c; code_of_int dstlen; expr.c; code_of_int srclen]}
| _, _ ->
{t = expr.t; c = "$ = $" $$ [lvalue.c; cast loc lvalue.t expr]}
else
error loc "%s cannot be assigned to %s variable" (describe_simple expr.t) (describe_simple lvalue.t)
let assignment_statement (loc : Location.t) (d : typed_code_t) (e : typed_code_t) : Code.t =
"$;\n" $$ [(assignment_expression loc d e).c]
(* Returns C code for the statement that initializes a variable of simple type 't',
(Initialization is usually only done on reference variables, to prevent pointers going stray.
This is used in variable, array element and RESULT parameter initializations. *)
let optionally_initialize_simple (t : simple_t) (var : Code.t) : Code.t =
match t with
| String n when n > 1 && !Options.initialize_all ->
"_awe_str_cpy($, $, $, $);\n" $$ [var; code_of_int n; blank_string n; code_of_int n]
| Reference _ ->
"$ = $;\n" $$ [var; default t]
| _ when !Options.initialize_all ->
"$ = $;\n" $$ [var; default t]
| _ ->
Code.empty
(* Algol blocks translate to GNU C "Statement Expressions", which are
blocks containing declarations and which return a value
(see http://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html).
({ decl;... statement;... value; })
If the value is a string pointer then it is possible that it points into an array
inside the block's declarations, which will go out of scope, invalidating the
pointer. The function '_awe_string' copies string arrays into a temporary array
('_awe_return_string') and returns a pointer to that.
Algol W appears to have been designed so that only one such temporary array
is necessary. *)
let copy_if_string (expr : typed_code_t) : Code.t =
match expr.t with
| String n when n > 1 -> "_awe_string($, $)" $$ [expr.c; code_of_int n]
| _ -> expr.c
(* * Programs ---------------------------------------------------------------------------- *)
(* The combined type checking and code generation pass is one huge recursive function
that starts here: the parse tree goes in one end and C code pops out the other. *)
let rec program (tree : Tree.t) : Code.t =
let program_expr = expression Predeclared.scope tree in
let loc = code_of_loc (Tree.to_loc tree) in
if program_expr.t <> Statement then
error (Tree.to_loc tree) "a program should be a statement, this returns %s" (describe_simple program_expr.t)
else
"$
$
extern int _awe_argc;
extern char **_awe_argv;
int main (int argc, char **argv) {
_awe_argc = argc;
_awe_argv = argv;
_awe_init($);
$
_awe_finalize($);
return 0;
}
\n" $$ [ notice; c_program_headers tree; loc; program_expr.c; loc ]
(* A separately compiled Algol procedure contains just headers and a C function. *)
and separate_procedure (procedure : Tree.t) : Code.t =
match procedure with
| Tree.PROCEDURE (_, _, _, _, _) ->
let block = {empty_block with scope = Predeclared.scope} in
let block = add_procedure_declaration procedure block in
let block = add_procedure_functions block in
"$\n$\n$\n" $$ [notice; c_program_headers procedure; block.functions]
| _ ->
failwith "separate_procedure"
(* Three things head a C code output file:
* An #include <awe.h> directive, for the prototypes and macros of the a2wc runtime library;
* A list of constant string declarations for each Algol source file, pointers to these
are used by the "_awe_at" arguments of functions that can raise run-time errors
(see the 'Runtime messages' section of "awe.h" for details);
* A list of string declarations for each record class in the program, pointers to these
are used to identify records' classes at runtime (see the 'References' section of "awe.h") *)
and c_program_headers (tree : Tree.t) : Code.t =
let class_name_code =
let decls =
List.map
(fun (id, name) -> "static const char * const $ = $;" $$ [Code.id id; c_str_const name])
(List.tl (Class.contents ()))
in
Code.separate "\n" decls
in
let source_name_code =
let decls =
mapi 0
(fun number name -> "static const char * const _awe_src_$ = $;" $$ [code_of_int number; c_str_const name])
(Location.source_files ())
in
Code.separate "\n" decls
in
"\n#include <awe.h>\n$\n$\n" $$ [source_name_code; class_name_code]
(* * Blocks -------------------------------------------------------------------------------- *)
(* 'block_expression' produces a C code block that corresponds to an Algol block.
Most of the work is done in other functions called from here.
Typically an Algol program is a block.
Algol declarations are interdependent and don't necessarily appear in order in the block,
so there needs to be several scans to collect them all:
1. record classes (for simple types and record fields);
2. procedure headers (for mutually recursive procedure calls),
also record fields, arrays and simple variables (order doesn't matter here);
3. labels (for non-local gotos in procedures);
4. procedure bodies (now that we have their global variables, labels, etc.)
The following are the declarations that will appear in a C block representing an Algol block,
see 'block_t'. Each group of declarations is optional, but they will always be declared
in the order shown:
block.outsidescope:
If there are array declarartions there will be two nested C blocks.
The outer one is for array bounds calculations (which must be
executed outside the block's scope, see section 5.2.2) and
array bounds checking macros.
block.labels:
Label declarations. Gnu C "Locally Declared Labels" declarations must appear first
in the C block. These declarations allow Gnu C nested functions to execute
non-local gotos. See http://gcc.gnu.org/onlinedocs/gcc/Local-Labels.html
block.structs:
"struct" declarations for record classes.
block.prototypes:
Prototypes for all C functions. If these appear at the top then the order of the
actual function definitions later on becomes unimportant. Algol procedures,
record designators and record fields are represented by C functions.
block.variables:
Declarations for C variables: simple variables, arrays of data for array designator
functions and runtime temporary variables.
block.functions:
C function declarations.
block.initialization:
Initialization assignment statements for REFERENCE arrays and simple variables.
(It is important to initialize reference variables to _awe_uninitialized_reference,
this prevents prevents stray pointer errors going uncaught. Other initializations
are optional.)
The statements
If the Algol block ends in an expression then the block is an expression
itself, and that final expression is its value. (See section 6)
Algol expression blocks translate to GNU C "Statement Expressions".
See: http://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html *)
and block_expression (scope : Scope.t)
(block_head : Tree.t list) (* declarations *)
(block_body : Tree.t list) (* statements, labels *)
: typed_code_t =
let block = {empty_block with scope = Scope.push scope} in
(* Compose the declaration scans: *)
let block = add_record_headers block block_head in
let block = List.fold_left add_declaration block block_head in
let block = add_label_declarations block block_body in
let block = add_procedure_functions block in
if block_body = [] then failwith "Compiler.block_expression: no block body" ;
let statement_items, return_expression = snip_last block_body in
let c_statements =
List.map
( function
| Tree.Label (_, id) -> "$:\n" $$ [Code.id id]
| statement -> expression_expect Statement block.scope statement )
statement_items
in
let body = Code.concat [ block.labels;
block.structs;
block.prototypes;
block.variables;
block.functions;
block.initialization;
Code.concat c_statements ]
in
let return_value = expression block.scope return_expression in
let inside_block =
if return_value.t = Statement then
"{\n$$}" $$ [body; return_value.c]
else
"({ $ $; })" $$ [ body; copy_if_string return_value]
in
let outside_block =
if block.outsidescope = Code.empty then
inside_block
else
(if return_value.t = Statement then "{\n$$\n}\n" else "({\n$$;\n})") $$ [ block.outsidescope; inside_block ]
in
{t = return_value.t; c = outside_block}
(* * Expressions -------------------------------------------------------------------------------- *)
(* Almost everything in Algol that is not a declaration is an expression.
The compiler treats statements as expressions of the type "Statement" (similar to C's "void" type).
The 'expression' function takes a parse tree and a scope, and returns C code and a simple type.
(Note: this function is very long, with a branch for each kind of expression.) *)
and expression (scope : Scope.t) (tree : Tree.t) : typed_code_t =
match tree with
| Tree.BEGIN (_, block_head, block_body, _) ->
block_expression scope block_head block_body
(* ** Constants - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *)
| Tree.TRUE _ -> { t = Logical; c = Code.string "1" }
| Tree.FALSE _ -> { t = Logical; c = Code.string "0" }
| Tree.NULL _ -> { t = Null; c = Code.string "(void *)0" }
(* INTEGER and BITS constants must fit into 32-bit words. *)
| Tree.Integer (loc, s) ->
( try
let i = Int64.of_string s in (* might fail *)
if i >= -2147483648L && i <= 2147483647L then
{t = Number(Short, Integer); c = Code.string s}
else
raise (Failure "")
with Failure _ ->
error loc "integer %s will not fit in a 32 bit word" s )
| Tree.Bits (loc, s) ->
( try
let hex = "0x" ^ s in
let i = Int64.of_string hex in (* range check, might fail *)
if i <= 0xffffffffL then
{t = Bits 32; c = Code.string hex}
else
raise (Failure "")
(* {t = Bits 64; c = Code.string hex} *)
with Failure _ ->
error loc "BITS constant #%s will not fit in a 32 bit word" s )
(* The real and exponent parts of an Algol real number are reassembled into a C floating point constants.*)
| Tree.Real (loc, r, "") -> {t = Number(Short, Real); c = Code.string r}
| Tree.Real (loc, r, e) -> {t = Number(Short, Real); c = Code.string (sprintf "%se%s" r e)}
| Tree.LongReal (loc, r, "") -> {t = Number(Long, Real); c = Code.string r}
| Tree.LongReal (loc, r, e) -> {t = Number(Long, Real); c = Code.string (sprintf "%se%s" r e)}
| Tree.Imaginary (loc, r, "") -> {t = Number(Short, Complex); c = Code.string (r ^ "i")}
| Tree.Imaginary (loc, r, e) -> {t = Number(Short, Complex); c = Code.string (sprintf "%se%si" r e)}
| Tree.LongImaginary (loc, r, "") -> {t = Number(Long, Complex); c = Code.string (r ^ "i")}
| Tree.LongImaginary (loc, r, e) -> {t = Number(Long, Complex); c = Code.string (sprintf "%se%si" r e)}
(* STRING constants. The type "_awe_str" is "char *" but C string constants are "const char *".
In some places (only conditional expressions, it appears) that would cause a type mismatch,
hence the need for a cast. String constants are Algol "expressions", not "designators",
so they are safe from modification regardless. *)
| Tree.String (loc, s) ->
( match String.length s with
| 0 -> error loc "Empty strings are not allowed in Algol W"
| 1 -> {t = String 1; c = c_char_const s}
| n -> {t = String n; c = "(_awe_str)$" $$ [c_str_const s]}
)
(* ** Operators: IS, unary and binary - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *)
(* IS is a special case among the binary operators because its right operand is not an expression.
See section 6.4. *)
| Tree.Binary (loc, reference, Tree.IS, record) ->
let record_id, record_class =
match record with
| Tree.Identifier (loc, id) ->
( match get loc scope id with
| Record (c, _) -> id, c
| defn -> error loc "expected a record class identifier here, this is %s" (describe_definition defn) )
| _ ->
error (Tree.to_loc record) "expected a record class identifier here"
in
let rc = expression scope reference in
( match rc.t with
| Null ->
{t = Logical; c = Code.string "0"} (* NULL never refers to a record *)
| Reference class_set ->
if ClassSet.mem record_class class_set then (* test at runtime *)
{t = Logical; c = "_awe_is($, $)" $$ [rc.c; Code.id (Class.to_id record_class)]}
else
error loc "%s will never refer to a RECORD %s" (describe_simple rc.t) (Id.to_string record_id)
| _ ->
error loc "expected a reference, this is %s" (describe_simple rc.t) )
| Tree.Unary (loc, o, a) ->
unary_expression loc scope o a
| Tree.Binary (loc, a, op, b) ->
binary_expression loc scope a op b
(* ** IF/THEN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *)
(* IF-THEN-ELSE is a Statement if both its branches are Statements, otherwise it is an expression.
An IF-THEN without an ELSE is always a statement.
The THEN branch of an IF THEN ELSE statement should be a <simple statement>
(see section 7.5.1), but Hendrik Boom's A68H code does not obey that rule,
suggesting that the eariler compiler did not insist on it - so Awe does not either.
*)
| Tree.IF (loc, condition, then_clause) ->
let cc = expression_expect Logical scope condition in
let ct = expression_expect Statement scope then_clause in
{ t = Statement;
c = "if ($)\n $" $$ [cc; ct] }
| Tree.IF_else (loc, condition, then_clause, else_clause) ->
let cc = expression_expect Logical scope condition in
let ce = expression scope else_clause in
let ct = expression scope then_clause in
let rtype =
try
Type.triplet_rule ct.t ce.t
with Incompatible ->
error loc "incompatible types: the THEN clause is %s and the ELSE clause is %s"
(describe_simple ct.t)
(describe_simple ce.t)
in
{ t = rtype;
c = match rtype with
| Statement ->
"if ($)\n $ \n else \n $ \n" $$ [cc; ct.c; ce.c]
| _ ->
"($ ? $ : $)" $$ [cc; cast loc rtype ct; cast loc rtype ce] }
(* ** CASE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *)
(* The CASE-OF-BEGIN-END statement. The selector expression is
stored in a temprory variable so that it does not get called a
second time if there is a range error. *)
| Tree.CASE (loc, selector, branches) ->
let cselector = expression_expect integer scope selector in
let make_branch index branch =
"case $: $; break;\n" $$ [ code_of_int index; (expression_expect Statement scope branch) ]
in
let cbranches = Code.concat (mapi 1 make_branch branches) in
{ t = Statement;
c =
"{
const int _selector = $;
switch (_selector) {
$
default: _awe_case_range_error($, _selector);
}
}
" $$ [cselector; cbranches; code_of_loc loc] }
| Tree.CASE_expr (loc, selector, branches) ->
let cselector = expression_expect integer scope selector in
let bs = List.map (expression scope) branches in
let rt =
try
List.fold_left
(fun t c -> Type.triplet_rule t c.t)
(List.hd bs).t
(List.tl bs)
with Incompatible ->
error loc "This CASE expression's branch expressions have incompatible types."
in
( match rt with
| String n when n > 1 ->
let value = {t = rt; c = Code.string "_awe_return_string"} in
let make_branch index branch =
"case $: $ break;\n" $$ [ code_of_int index; assignment_statement loc value branch ]
in
let cbranches = Code.concat (mapi 1 make_branch bs) in
{ t = rt;
c = "({ int _selector = $;
switch (_selector) {
$
default: _awe_case_range_error($, _selector);
}
_awe_return_string; })" $$ [cselector; cbranches; code_of_loc loc] }
| _ ->
let value = {t = rt; c = Code.string "_value"} in
let make_branch index branch =
"case $: $ break;\n" $$ [ code_of_int index; assignment_statement loc value branch ]
in
let cbranches = Code.concat (mapi 1 make_branch bs) in
{ t = rt;
c = "({ $ _value;
int _selector = $;
switch (_selector) {
$
default: _awe_case_range_error($, _selector);
}
_value; })" $$ [ctype rt; cselector; cbranches; code_of_loc loc] }
)
(* ** Iterative statements: WHILE, FOR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *)
| Tree.WHILE (loc, condition, then_clause) ->
let cc = expression_expect Logical scope condition in
let ct = expression_expect Statement scope then_clause in
{ t = Statement;
c = "while ($)\n $ \n" $$ [cc; ct] }
(* The limit and step expression of FOR statments are placed in
temporary variables to prevent them from being called more than once. *)
| Tree.FOR (loc, control, first, last, body) ->
let ccontrol = Code.id control in
let for_body_scope = set loc (Scope.push scope) control Control in
{ t = Statement;
c =
"{
const int _start = $;
const int _limit = $;
int $ = _start;
while ($ <= _limit) {
$
++$;
}
}
" $$ [ expression_expect integer scope first;
expression_expect integer scope last;
ccontrol;
ccontrol;
expression_expect Statement for_body_scope body;
ccontrol ] }
| Tree.FOR_step (loc, control, first, step, last, body) ->
let ccontrol = Code.id control in
let for_body_scope = set loc (Scope.push scope) control Control in
{ t = Statement;
c =
"{
const int _start = $;
const int _step = $;
const int _limit = $;
int $ = _start;
_awe_check_for_step($, _step);
while (_step > 0 ? $ <= _limit : $ >= _limit) {
$
$ += _step;
}
}
" $$ [ expression_expect integer scope first;
expression_expect integer scope step;
expression_expect integer scope last;
ccontrol;
code_of_loc loc;
ccontrol; ccontrol;
expression_expect Statement for_body_scope body;
ccontrol ] }
(* As far as I can tell, all of the expressions in the list form of
the FOR statement must be evaluated before the body is executed,
that is why they are being stored in a temporary array here. *)
| Tree.FOR_list (loc, control, expressions, body) ->
let ccontrol = Code.id control in
let nexpressions = code_of_int (List.length expressions) in
let es = List.map (expression_expect integer scope) expressions in
let cassignments = Code.concat (mapi 0 (fun i e -> "_a[$] = $;\n" $$ [code_of_int i; e]) es) in
let for_body_scope = set loc (Scope.push scope) control Control in
let cbody = expression_expect Statement for_body_scope body in
{ t = Statement;
c =
"{
int $, _i, _a[$];
$
for (_i = 0; _i < $; ++_i) {
$ = _a[_i];
$
}
}
" $$ [ ccontrol; nexpressions;
cassignments;
nexpressions;
ccontrol;
cbody ] }
(* ** Other statements: GOTO, ASSERT, empty - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *)
| Tree.GOTO (loc, label) ->
( match (get loc scope label) with
| Label ->