-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathjamie-examples.df
211 lines (175 loc) · 7.21 KB
/
jamie-examples.df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# Examples for Jamie:
# DONE - transitive closure
# - flow analysis, liveness analysis, constant analysis
# DONE - parsing
# DONE - DFA equivalence
# DONE - shortest path
# DONE - treeP from semmle
# - find those examples from the physicist on my gist
## Builtin types
# op A = A where (x ≤ y : op A) iff (y ≤ x : A)
#
# data Maybe A where
# just : A -> Maybe A
# none : Maybe A
# ordered
# none ≤ just x ∀x
# just x ≤ just y if x ≤ y
## Builtin functions
# 1. len : string -> nat
# 2. substring : string -> nat,nat -> string
# 3. _+_ : nat => nat => nat
# 4. not : op bool => bool
#
# For any first-order type A:
# 5. _=_ : A -> A -> bool
# 6. _in?_ : A -> {A} => bool set membership
# 7. _\_ : {A} => op {A} => {A} set difference
#
# A type is first-order if it doesn't contain functions. I think restricting
# first-order types not to contain sets is also fine for these examples.
# ---------- Utility functions ----------
# `range` wouldn't pass Datafun's termination checker.
def range: (op nat, nat) => {nat}
fn (lo,hi) => fix self as
{x | x in {lo} or {x+1 | x in self}, x <= hi}
# All substrings, including empty ones, indexed by bounds. Computing this
# directly is stupid, but inlining it and optimising could probably turn most
# invocations into calls to `substring`.
def substrings: string -> {nat,nat: string}
fn s => { i,j: substring s (i,j)
| i in range (0, len string)
, j in range (i, len string)}
# Pretend A stands for any type. This would be better implemented as a
# primitive, of course.
def empty?: op {A} => bool
fn s => not (for x in s do true)
# Not particularly useful. Also wouldn't pass the termination checker.
def subsets: {nat} => {{nat}}
fn univ => self as
or {{}}
or {{x} | univ[x]}
or {x or y | self[x], self[y]}
# NB:
#
# {... | foo[x,y]} == {... | (x,y) in foo}
# (for foo[x,y] do ...) == (for x,y in foo do ...)
#
# I sometimes prefer this notation for its brevity and similarity to
# {Pro,Data}log's "foo(x,y)".
# ---------- Transitive closure, naive and semi-naive ----------
# reachable-naive is faster under naive evaluation, because it takes
# log2(max path length) iterations. reachable-seminaive is faster under
# seminaive evaluation, because it's "linear" (it uses itself recursively "at
# most once") and so has a derivative that doesn't involve recomputation.
# This is all exactly analogous to Datalog.
def reachable-naive : {nat,nat} => {nat,nat}
fn edges => paths as edges or {(x,z) | paths[x,y], paths[y,z]}
def reachable-seminaive : {nat,nat} => {nat,nat}
fn edges => paths as edges or {(x,z) | edges[x,y], paths[y,z]}
# ---------- Parsing, the CYK algorithm ----------
type symbol = string
type rule = String(string) | Cat(symbol, symbol)
type grammar = {symbol, rule}
def parse: grammar -> string -> {symbol, nat, nat}
fn grammar text => chart as
{(i,j) | grammar[x, String(s)], substrings text [i,j]: s}
{(i,k) | grammar[x, Cat(y,z)], chart[x,i,j], chart[y,j,k]}
# ---------- Shortest path ----------
# This is not Dijkstra's algorithm, unless interpreted very intelligently.
# This depends on nat being able to take fixed points whose values are
# dictionaries into a non-powerset semilattice (namely, distances under min). In
# principle, distances are 0 ≤ 1 ≤ 2 ≤ ... ≤ ∞, where ∞ means "disconnected".
# However, we need to invert this order; ∞ ≤ ... ≤ 2 ≤ 1 ≤ 0. This is because
# our fixed point computations start from the least value and move upward. We
# should start by assuming everything is disconnected and move in the direction
# of shorter paths.
#
# Put another way, Datafun computes _least_ fixed points. Saying "everything is
# distance 0 to everything else" is clearly a fixed point (nothing can get any
# closer), so if 0 were the least value, it would be the least fixed point!
#
# Finally, we don't need an explicit ∞ because we can represent
# "disconnectedness" by _absence_ (from a set or dictionary). So our distance
# type is simply (op nat).
type dist = op nat
type node = nat
def shortest-paths: {node, node, dist} => {node, node: dist}
fn edges => self as
or {u,v: d | edges[x,y,d]}
or {u,w: (d1 + d2) | self[u,v]: d1, self[v,w]: d2}
# ---------- DFA (in)equivalence ----------
# A DFA is a tuple (states, final?, transitions).
type dfa = ({node}, (node -> bool), {node,char: node})
# Given a single DFA, finds all state-pairs (u,v) such that u is NOT equivalent
# to v. Works by assuming all states are equivalent and ruling out equivalences.
# Probably very inefficient. Is there an inverse way to do this - start out
# assuming all states are distinct and proving equivalences?
def distinct : dfa -> {node, node}
fn (states, final?, trans) => distinct as
# two states are distinct if one is final & the other not
or {u,v | states[u], states[v], final? u /= final? v}
# or if they transition to distinct states under the same character
or {u,v | distinct[w1,w2], trans[u,c]: w1, trans[v,c]: w2}
# If you want to find out if two DFAs are equivalent, just disjoint-union them
# into one DFA, and ask whether their start states are distinct.
# ---------- treeP ----------
# I got this example from the folks at Semmle.
#
# Given a digraph and a predicate on nodes, find all nodes such that the
# subgraph reachable from them is a DAG all of whose nodes satisfy P.
#
# Note that this fixed point is monotone, but only by double negation. empty? is
# antitone, and not is antitone, thus (empty? {y | child[x,y], not (y in? self)}) is
# monotone in `self`.
def treeP: {node, node} -> {node} => {node}
fn child p => self as
{x | p[x], empty? {y | child[x,y], not (y in? self)}}
# ---------- Simple static analyses ----------
type var = string
type line = nat
type op = Add | Sub | Mul | Div | Eq | Neq
type expr = Var(var) | Op(op, var, var)
type stmt = Skip
| Assign(var, expr)
| Return(var)
| If(var, line, line)
| Goto(line)
type prog = {line: stmt}
# ----- 1-step control flow -----
type flow = {line, line}
def flow: prog -> flow
fn prog =>
for prog[i]: stmt do
case stmt
| Goto(j) -> {(i,k)}
| If(_,j,k) -> {(i,j), (i,k)}
| Skip | Assign(_) -> {(i,i+1)}
| Return -> {}
# ----- Liveness analysis -----
def uses: expr -> {var}
fn e => case e | Var(v) => {v} | Op(_,v1,v2) => {v1,v2}
# Is some info about a variable flowing INTO a line, or OUT OF a line?
type how = In | Out
# How a given statement changes what variables are live. Given what variables
# are live after it, returns what variables are live before it.
def needs: stmt -> {var} => {var}
fn stmt after =>
case stmt
# Skip & goto do nothing.
| Skip | Goto(_) -> after
# Return statements and conditionals always need their variables.
| Return(v) | If(v,_,_) -> {v} or after
| Assign(v, e) ->
# We supply the value of v.
or (after \ {v})
# We need our sources, if our destination is live.
or (if v in? after do uses e)
def live: prog -> flow -> {line, how, var}
fn prog flows => self as
# Propagate liveness info backwards.
or {(i, Out, v) | flows[i,j], self[j, In, v]}
# Analyse statements.
or {(i, In, v) | prog[i]: stmt, v in needs stmt {v | self[i, Out, v]} }
# ----- TODO: Reaching assignment analysis -----
# ----- TODO: Constant variable analysis -----