-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinfer.ml
641 lines (610 loc) · 19.8 KB
/
infer.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
(**
* This code is public domain, and
* a full code listing can be found at
* github.com/bynect/constraint-inference
*
* This is an indipendent implementation of the
* bottom-up type inference algorithm
* described in the following paper
*
* Bastiaan Heeren, Jurriaan Hage, Doaitse Swierstra
* Generalizing Hindley-Milner Type Inference Algorithms
* Institute of Information and Computing Sciences,
* Utrecht University, 2002
* [UU-CS-2002-031]
*
* A copy of the original paper can be found at
* www.cs.uu.nl/research/techreps/repo/CS-2002/2002-031.pdf
*
* NOTE: Some sentences are either taken verbatim
* or rewording of excerpt from the paper
*
* NOTE: While the main algorithm described in
* the paper is unchanged, there may be small
* divergences between this implementation and
* the paper pseudo-implementation
*
* NOTE: This document contains additions to the
* original described in the paper above.
* A version of this document more faithful to the paper
* can be found at
* github.com/bynect/constraint-inference/tree/original
*)
(**
* An expression is denoted by E (uppercase e)
*
* E ::=
* | x
* | E1 E2
* | λx -> E
* | let x = E1 in E2
* | rec x = E1 in E2
* | if E1 then E2 else E3
* | E1, ..., En
* | lit
*)
type expr =
| Var of string
| App of expr * expr
| Abs of string * expr
| Let of string * expr * expr
| Rec of string * expr * expr
| If of expr * expr * expr
| Tup of expr list
| Lit of lit
and lit = Int of int | Bool of bool
(**
* A type variable is denoted by α (lowercase alpha)
* A fresh type variable is denoted by β (lowercase beta)
*
* While there should be infinitely many fresh
* type variables available, this implementation
* uses a reasonable limit
*)
type var = string
(**
* Generate an unique fresh type variable β
*
* NOTE: This function is stateful
*)
let var_fresh : unit -> var =
let state = ref 1 in
fun () ->
let n = !state in
state := n + 1;
Printf.sprintf "t%d" n
(**
* A type is denoted by τ (lowercase tau)
*
* τ ::=
* | α
* | C τ1...τn
* | τ1 -> τ2
*
* NOTE: The function constructor can be represented
* by a type constructor.
*)
type ty =
| TVar of var
| TCon of string * ty list
| TFun of ty * ty
(**
* A type scheme is denoted by σ (lowercase sigma)
*
* In a type scheme a set of type variables α1...αn,
* representing the polymorphic type variables, are
* bound to an universal quantifier
*
* σ ::= ∀α.τ
*)
type scheme = Scheme of var list * ty
(**
* An assumption is denoted by A (uppercase a)
*
* An assumption set is used to record the type variables
* that are assigned to the occurrences of free variables
* instead of a type environment Γ used in Algorithm W
*
* Unlike the common type environment Γ, in an assumption set
* there can be different assumptions for a given variable
*)
type assump = Assumption of string * ty
(**
* A constraint is denoted by C (uppercase c)
*
* A set of constraints is collected instead of constructing
* a substitution by postponing the unification process
*
* The equality constraint (τ1 ≡ τ2) reflects that type τ1 and
* type τ2 should be unified
*
* The implicit instance constraint (τ1 ≤M τ2) expresses that
* type τ1 should be an instance of the type scheme obtained
* by generalizing type τ2 with the set of monomorphic
* type variables M
*
* The explicit instance constraint (τ ≤ σ) states that
* type τ has to be a generic instance of the type scheme σ
*
* C ::=
* | τ1 ≡ τ2
* | τ1 ≤M τ2
* | τ ≤ σ
*)
type constr =
| Equality of ty * ty
| ImplInstance of ty * ty * var list
| ExplInstance of ty * scheme
(**
* A monomorphic set is denoted by M (uppercase m)
*
* Unlike the rest of the algorithm, which is bottom-up,
* to compute a set of monomorphic type variables M,
* a single top-down computation is used
*
* Unlike a type environment Γ, a monomorphic set M
* contains only type variables, not a mapping from
* name to type, and only the type variables introduced
* by lambdas at higher levels in the abstract
* syntax tree will be added to the set
*)
module Set = Set.Make (String)
type mono = Set.t
(**
* A substitution is denoted by S (uppercase s)
*
* Substitutions are mapping of type variables to types
* A substitution for a set of type variables {α1, ..., αn}
* is [α1 := τ1, ..., αn := τn]
*
* Substitutions are idempotent, so that S(Sτ) = Sτ
*)
module Map = Map.Make (String)
type subst = ty Map.t
(**
* Substitution application for a type
**)
let rec apply (s : subst) : ty -> ty = function
| TVar var -> begin
match Map.find_opt var s with
| Some ty -> ty
| None -> TVar var
end
| TCon (name, ts) -> TCon (name, (List.map (apply s) ts))
| TFun (t1, t2) -> TFun (apply s t1, apply s t2)
(**
* Substitution application for a type scheme
*)
let apply' (s : subst) : scheme -> scheme = function
| Scheme ([], ty) -> Scheme ([], apply s ty)
| Scheme (vars, ty) ->
let s' = List.fold_right (fun v acc -> Map.remove v acc) vars s in
Scheme (vars, apply s' ty)
(**
* Composing substitution S1 and substitution S2 is
* written as (S2 ◦ S1) and results in another substitution
*
* NOTE: Substitution composition is left biased
*)
let compose (s1 : subst) (s2 : subst) : subst =
Map.map (fun ty -> apply s1 ty) s2 |> Map.union (fun _ ty _ -> Some ty) s1
(**
* Substitutions that do not pass the
* occur check are defined to be equal
* to the error substitution, denoted by T
*
* The composition of any substitution S
* with the error substitution T results
* in T as well
*
* NOTE: This implementation uses
* exceptions as error substitution
*)
type err = UnboundVar of string | OccurFail of var * ty | UnifyFail of ty * ty
exception Error of err
(**
* Constraints collection
*
* The algorithm for gathering constraint and
* assumptions is bottom-up, and the only
* process which is top-down is the creation
* of the monomorphic set M
*
* Every time a variable is used, a fresh type
* variable β is generated and appended to the
* assumption set, and there can be multiple
* different assumption regarding the same
* variable name
*
* Applications create a fresh type
* variable β, merge and propagate the assumptions
* and constraints collected by the subexpressions
* while adding an equality constraint to ensure
* that the type of the first expression matches
* with the type of the second expression and β
*
* Lambda abstractions create a fresh type
* variable β representing the bound variable
* and add it to the monomorphic set M before
* the collection of the subexpression
*
* An equality constraint is generated for each
* type variable β in the assumption set that is
* associated with the variable bound by the lambda,
* and the assumptions concerning this variable
* are removed from the set before being propagated
*
* Let expressions introduce polymorphism by
* generating an implicit instance constraint,
* using the monomorphic set M, for each type
* variable β in the assumption set that is
* associated with the variable bound by the let,
* while removing the assumptions concerning this
* variable from the set before being propagated
*
* Literals return their type without any
* new constraint or assumption
*
* NOTE: Although the set of constrains is
* not ordered, an implicit instance constraint
* requires some constraints to be solved before
* it becomes solvable
*
* NOTE: In this implementation the monomorphic set M
* is propagated alongside the abstract syntax tree
* traversal by the collect function, but it could be
* also easily implemented as a separate function
*)
let rec collect (m : mono) : expr -> ty * assump list * constr list = function
| Var x ->
let ty = TVar (var_fresh ()) in
(ty, [ Assumption (x, ty) ], [])
| App (e1, e2) ->
let beta = var_fresh ()
and t1, a1, c1 = collect m e1
and t2, a2, c2 = collect m e2 in
(TVar beta, a1 @ a2, Equality (t1, TFun (t2, TVar beta)) :: (c1 @ c2))
| Abs (x, e) ->
let beta = var_fresh () in
let t, a, c = collect (Set.add beta m) e in
let a', c' =
List.fold_left
(fun (acc, acc') a' ->
let (Assumption (x', t')) = a' in
if x' = x then (acc, Equality (t', TVar beta) :: acc')
else (Assumption (x', t') :: acc, acc'))
([], c) a
in
(TFun (TVar beta, t), a', c')
| Let (x, e1, e2) ->
let t1, a1, c1 = collect m e1 and t2, a2, c2 = collect m e2 in
let m' = Set.to_seq m |> List.of_seq in
let a', c' =
List.fold_left
(fun (acc, acc') a' ->
let (Assumption (x', t')) = a' in
if x' = x then (acc, ImplInstance (t', t1, m') :: acc')
else (Assumption (x', t') :: acc, acc'))
([], []) a2
in
(t2, a1 @ a', c1 @ c2 @ c')
| Rec (x, e1, e2) ->
let tv = TVar (var_fresh ()) in
let t1, a1, c1 = collect m (Abs (x, e1)) and t2, a2, c2 = collect m e2 in
let m' = Set.to_seq m |> List.of_seq in
let a', c' =
List.fold_left
(fun (acc, acc') a' ->
let (Assumption (x', t')) = a' in
if x' = x then (acc, ImplInstance (t', tv, m') :: acc')
else (Assumption (x', t') :: acc, acc'))
([], []) a2
in
(t2, a1 @ a', Equality (TFun (tv, tv), t1) :: c1 @ c2 @ c')
| If (e1, e2, e3) ->
let t1, a1, c1 = collect m e1
and t2, a2, c2 = collect m e2
and t3, a3, c3 = collect m e3
in
(t2, a1 @ a2 @ a3, Equality (t1, TCon ("Bool", [])) :: Equality (t2, t3) :: (c1 @ c2 @ c3))
| Tup es ->
let ts, a, c = List.fold_left (fun (acc, a, c) e ->
let t', a', c' = collect m e in
t' :: acc, a @ a', c @ c')
([], [], []) es
in
(TCon (String.make (List.length ts) ',', ts), a, c)
| Lit (Int _) -> (TCon ("Int", []), [], [])
| Lit (Bool _) -> (TCon ("Bool", []), [], [])
(**
* The set of free type variables of a type τ is
* denoted by freevars(τ) and consinst of all
* the type variables in τ
*)
let rec freevars : ty -> Set.t = function
| TVar var -> Set.singleton var
| TCon (_, ts) -> List.fold_left (fun acc ty -> Set.union (freevars ty) acc) Set.empty ts
| TFun (t1, t2) -> Set.union (freevars t1) (freevars t2)
(**
* The set of free type variables of a type scheme σ
* is denoted by freevars(∀α.τ) and is equal to
* freevars(τ) - α
*)
let freevars' : scheme -> Set.t = function
| Scheme ([], ty) -> freevars ty
| Scheme (vars, ty) -> Set.diff (freevars ty) (Set.of_list vars)
(**
* For implicit instance constraints, the
* substitution S is also applied to the set
* of monomorphic type variables M
*)
let apply_constr (s : subst) : constr -> constr = function
| Equality (t1, t2) -> Equality (apply s t1, apply s t2)
| ImplInstance (t1, t2, m) ->
let m' =
List.fold_left
(fun acc var ->
match Map.find_opt var s with
| Some ty ->
let vars = freevars (apply s ty) |> Set.to_seq |> List.of_seq in
vars @ acc
| None -> var :: acc)
[] m
in
ImplInstance (apply s t1, apply s t2, m')
| ExplInstance (t, sigma) -> ExplInstance (apply s t, apply' s sigma)
(**
* A substitution S applied to a constraint set
* is simply applied to all the types and
* type schemes contained therein
*)
let apply_constr' (s : subst) (c : constr list) : constr list =
List.map (apply_constr s) c
(**
* The active type variables in a constraint are
* defined as follows
*
* activevars (τ1 ≡ τ2) = freevars(τ1) ∪ freevars(τ2)
* activevars (τ1 ≤M τ2) = freevars(τ1) ∪ (freevars(M) ∩ freevars(τ2))
* activevars (τ ≤ σ) = freevars(τ) ∪ freevars(σ)
*
* NOTE: freevars(M) is M
*)
let activevars : constr -> Set.t = function
| Equality (t1, t2) -> Set.union (freevars t1) (freevars t2)
| ImplInstance (t1, t2, m) ->
let m' = Set.of_list m in
Set.union (freevars t1) (Set.inter m' (freevars t2))
| ExplInstance (t, sigma) -> Set.union (freevars t) (freevars' sigma)
let activevars' : constr list -> Set.t = function
| [] -> Set.empty
| c ->
List.fold_left (fun acc c' -> Set.union (activevars c') acc) Set.empty c
(**
* Instantiate the type scheme σ by subsituting
* all the bound type β variables α with fresh
* type variables in the type τ
*
* instantiate(∀α1, ..., αn.τ) = [α1 := β1, ..., αn := βn]τ
* where β1, ..., βn are fresh
*)
let instantiate : scheme -> ty = function
| Scheme ([], ty) -> ty
| Scheme (vars, ty) ->
let s =
List.fold_left
(fun acc var -> Map.add var (TVar (var_fresh ())) acc)
Map.empty vars
in
apply s ty
(**
* Generalize a type τ to create a type
* scheme σ with respect to the set of
* type variables M
*
* generalize(M, τ) = ∀α.τ
* where α1...αn = freevars(τ) − M
*)
let generalize (m : var list) (ty : ty) : scheme =
let m = Set.diff (freevars ty) (Set.of_list m) in
Scheme (Set.to_seq m |> List.of_seq, ty)
(**
* Most General Unification
*
* Given types τ1 and τ2, mgu(τ1,τ2) returns
* either the most general unifier, which is
* a substitution S, or an error if unification
* is impossible
*
* For the resulting substitution S it holds
* by definition that Sτ1 = Sτ2
*)
let rec mgu (t1 : ty) (t2 : ty) : subst =
let bind var ty =
match ty with
| TVar var' when var = var' -> Map.empty
| _ when Set.mem var (freevars ty) -> Error (OccurFail (var, ty)) |> raise
| _ -> Map.singleton var ty
in
match (t1, t2) with
| TVar var, ty | ty, TVar var -> bind var ty
| TCon (name, ts), TCon (name', ts') when name = name' ->
if List.length ts != List.length ts' then
Error (UnifyFail (t1, t2)) |> raise
else
List.fold_left2 (fun acc t1 t2 ->
compose (mgu (apply acc t1) (apply acc t2)) acc)
Map.empty ts ts'
| TFun (t1, t2), TFun (t1', t2') ->
let s1 = mgu t1 t1' in
let s2 = mgu (apply s1 t2) (apply s1 t2') in
compose s2 s1
| _ -> Error (UnifyFail (t1, t2)) |> raise
(**
* Constraints solving
*
* After the generation of a constraint set,
* a substitution is constructed that satisfies
* each constraint in the set
*
* Satisfaction of a constraint C by a
* substitution S is defined as
*
* S satisfies (τ1 ≡ τ2) = Sτ1 = Sτ2
* S satisfies (τ1 ≤M τ2) = Sτ1 ≤ generalize(SM, Sτ2)
* S satisfies (τ ≤ σ) = Sτ ≤ Sσ
*
* After substitution, the two types of an
* equality constraint should be syntactically equal
*
* The equality constraint can be satified by the
* most general unifier of types τ1 and τ2
*
* The implicit instance constraint applies the
* substitution not only to both types τ1 and τ2,
* but also to the monomorphic type variables set M
*
* For explicit instance constraint the substitution
* is applied only to the type τ and type scheme σ,
* leaving untouched the quantified type variables
*
* Due to the fact that in general generalize(SM, Sτ)
* is not equal to S(generalize(M, τ)),
* implicit and explicit instance constraints
* really have different semantics
*
* Note that for two types τ1 and τ2 the following
* properties are valid
*
* S satisfies τ1 ≡ τ2 ⇐⇒ S satisfies τ1 ≤ τ2
* S satisfies τ1 ≡ τ2 ⇐⇒ S satisfies τ1 ≤M=freevars(τ2) τ2
* S satisfies τ1 ≤M τ2 ⇐⇒ S satisfies τ1 ≤ generalize(SM, Sτ2)
*
* The first two properties show that every equality
* constraint can be expressed as an instance constraint
* of either type, and the justification of these properties
* is that the only generic instance of a type is the
* type itself, and because generalize(freevars(τ), τ)
* equals τ for all types τ
*
* The third property is justified by the fact that
* substitution S is idempotent, from which follows that
* S(generalize(SM, Sτ)) is equal to generalize(SM, Sτ).
*
* The algorithm used in the solve function assumes
* that the constraint set always contains a constraint
* which can be solved, and in particular, if it contains
* only implicit instance constraints, then there is one
* for which its condition is fulfilled
*)
let rec solve : constr list -> subst = function
| [] -> Map.empty
| Equality (t1, t2) :: c ->
(* Find the substitution S for the most general unifier
* and apply it to the constraint set C *)
let s = mgu t1 t2 in
compose (apply_constr' s c |> solve) s
| ImplInstance (t1, t2, m) :: c ->
(* If the constraint is solvable then
* create a type scheme σ by generalizing
* the type τ2 with the monomorphic set M
* and transform the implicit instance constraint
* to an explicit instance constraint *)
let ftv = Set.diff (freevars t2) (Set.of_list m) in
if Set.inter ftv (activevars' c) |> Set.is_empty then
solve (ExplInstance (t1, generalize m t2) :: c)
(* TODO: Check if this is correct and terminates for
* all inputs *)
else solve (c @ [ ImplInstance (t1, t2, m) ])
| ExplInstance (t, sigma) :: c ->
(* Instantiate the type scheme σ and convert
* the explicit instance constraint to an
* equality constraint between the type τ and
* the newly instantiated type *)
solve (Equality (t, instantiate sigma) :: c)
(**
* A type environment is denoted by Γ (uppercase gamma)
*
* In a type environment a type scheme σ
* is paired with a variable name x
*
* Unlike algorithm W and M, which use the
* type environment with top-down rules,
* this algorithm the type environment Γ
* is combined with the assumption set
* to create an extra set of constraints
*
* A ≤ Γ = { τ ≤ σ | x:τ ∈ A, x:σ ∈ Γ }
*
* The following properties can be derived
* from the definition of ≤
*
* (A1 ∪ A2) ≤ Γ = (A1 ≤ Γ) ∪ (A2 ≤ Γ)
* A ≤ (Γ1 ∪ Γ2) = (A ≤ Γ1) ∪ (A ≤ Γ2)
* A ≤ Γ\x = A\x ≤ Γ
* A ≤ {x: τ} = {τ' ≡ τ | x:τ' ∈ A}
*)
type env = scheme Map.t
(**
* This function creates a constraint set C
* given a type environment Γ and an
* assumption set A
*
* In a correct program dom(A) ⊆ dom(Γ),
* meaning that all the variable names
* present in the assumption set A must
* be bound in the type environment Γ
*
* TODO: Find a better name
*)
let env_aux (gamma : env) (a : assump list) : constr list =
match a with
| [] -> []
| _ ->
List.fold_left
(fun acc a' ->
let (Assumption (var, ty)) = a' in
match Map.find_opt var gamma with
| Some sigma -> ExplInstance (ty, sigma) :: acc
(* If domain(A) not a subset of domain(Γ) then
* an undefined variable was referenced *)
| None -> Error (UnboundVar var) |> raise)
[] a
(**
* This algorithm computes a type τ
* for an expression E given a type
* environment Γ, also returning a
* substitution S, giving it the same
* signature of algorithm W
*
* Because there is no distinction
* between the type variables introduced
* by applying the inference rules,
* and those (monomorphic) type variables
* that occur in the initial type environment Γ,
* a substitution can change the types in Γ
*
* infer(Γ, E) =
* A, C |- E:τ
* if dom(A) ⊄ dom(Γ)
* then report unbound variables exist
* else S = solve(C ∪ A ≤ Γ)
* return (S, Sτ)
*
* (S, Sτ)
*
* NOTE: Algorithm W is a deterministic
* instance of infer
*
* NOTE: This algorithm is nondeterministic
* as the order in which the constraints are
* solved determines the location where a type
* error is detected and reported
* In the process of type inferencing,
* this dependency can result in a bias
*)
let infer (gamma : env) (e : expr) : subst * ty =
let ty, a, c = collect Set.empty e in
let c' = env_aux gamma a in
let s = solve (c @ c') in
(s, apply s ty)