jamie-examples.df

# Examples for Jamie:
# DONE - transitive closure
# - flow analysis, liveness analysis, constant analysis
# DONE - parsing
# DONE - DFA equivalence
# DONE - shortest path
# DONE - treeP from semmle
# - find those examples from the physicist on my gist

## Builtin types
# op A = A where (x ≤ y : op A) iff (y ≤ x : A)
#
# data Maybe A where
#   just : A -> Maybe A
#   none : Maybe A
# ordered
#   none ≤ just x       ∀x
#   just x ≤ just y     if x ≤ y

## Builtin functions
# 1. len : string -> nat
# 2. substring : string -> nat,nat -> string
# 3. _+_ : nat => nat => nat
# 4. not : op bool => bool
#
# For any first-order type A:
# 5. _=_ : A -> A -> bool
# 6. _in?_ : A -> {A} => bool           set membership
# 7. _\_ : {A} => op {A} => {A}         set difference
#
# A type is first-order if it doesn't contain functions. I think restricting
# first-order types not to contain sets is also fine for these examples.


# ---------- Utility functions ----------
# `range` wouldn't pass Datafun's termination checker.
def range: (op nat, nat) => {nat}
fn (lo,hi) => fix self as
  {x | x in {lo} or {x+1 | x in self}, x <= hi}

# All substrings, including empty ones, indexed by bounds. Computing this
# directly is stupid, but inlining it and optimising could probably turn most
# invocations into calls to `substring`.
def substrings: string -> {nat,nat: string}
fn s => { i,j: substring s (i,j)
        | i in range (0, len string)
        , j in range (i, len string)}

# Pretend A stands for any type. This would be better implemented as a
# primitive, of course.
def empty?: op {A} => bool
fn s => not (for x in s do true)

# Not particularly useful. Also wouldn't pass the termination checker.
def subsets: {nat} => {{nat}}
fn univ => self as
  or {{}}
  or {{x} | univ[x]}
  or {x or y | self[x], self[y]}

# NB:
#
#             {... | foo[x,y]}    ==  {... | (x,y) in foo}
#           (for foo[x,y] do ...) == (for x,y in foo do ...)
#
# I sometimes prefer this notation for its brevity and similarity to
# {Pro,Data}log's "foo(x,y)".


# ---------- Transitive closure, naive and semi-naive ----------

# reachable-naive is faster under naive evaluation, because it takes
# log2(max path length) iterations. reachable-seminaive is faster under
# seminaive evaluation, because it's "linear" (it uses itself recursively "at
# most once") and so has a derivative that doesn't involve recomputation.
# This is all exactly analogous to Datalog.

def reachable-naive : {nat,nat} => {nat,nat}
fn edges => paths as edges or {(x,z) | paths[x,y], paths[y,z]}

def reachable-seminaive : {nat,nat} => {nat,nat}
fn edges => paths as edges or {(x,z) | edges[x,y], paths[y,z]}


# ---------- Parsing, the CYK algorithm ----------
type symbol = string
type rule = String(string) | Cat(symbol, symbol)
type grammar = {symbol, rule}

def parse: grammar -> string -> {symbol, nat, nat}
fn grammar text => chart as
  {(i,j) | grammar[x, String(s)], substrings text [i,j]: s}
  {(i,k) | grammar[x, Cat(y,z)], chart[x,i,j], chart[y,j,k]}


# ---------- Shortest path ----------
# This is not Dijkstra's algorithm, unless interpreted very intelligently.

# This depends on nat being able to take fixed points whose values are
# dictionaries into a non-powerset semilattice (namely, distances under min). In
# principle, distances are 0 ≤ 1 ≤ 2 ≤ ... ≤ ∞, where ∞ means "disconnected".
# However, we need to invert this order; ∞ ≤ ... ≤ 2 ≤ 1 ≤ 0. This is because
# our fixed point computations start from the least value and move upward. We
# should start by assuming everything is disconnected and move in the direction
# of shorter paths.
#
# Put another way, Datafun computes _least_ fixed points. Saying "everything is
# distance 0 to everything else" is clearly a fixed point (nothing can get any
# closer), so if 0 were the least value, it would be the least fixed point!
#
# Finally, we don't need an explicit ∞ because we can represent
# "disconnectedness" by _absence_ (from a set or dictionary). So our distance
# type is simply (op nat).
type dist = op nat
type node = nat

def shortest-paths: {node, node, dist} => {node, node: dist}
fn edges => self as
  or {u,v: d | edges[x,y,d]}
  or {u,w: (d1 + d2) | self[u,v]: d1, self[v,w]: d2}


# ---------- DFA (in)equivalence ----------
# A DFA is a tuple (states, final?, transitions).
type dfa = ({node}, (node -> bool), {node,char: node})

# Given a single DFA, finds all state-pairs (u,v) such that u is NOT equivalent
# to v. Works by assuming all states are equivalent and ruling out equivalences.
# Probably very inefficient. Is there an inverse way to do this - start out
# assuming all states are distinct and proving equivalences?
def distinct : dfa -> {node, node}
fn (states, final?, trans) => distinct as
  # two states are distinct if one is final & the other not
  or {u,v | states[u], states[v], final? u /= final? v}
  # or if they transition to distinct states under the same character
  or {u,v | distinct[w1,w2], trans[u,c]: w1, trans[v,c]: w2}

# If you want to find out if two DFAs are equivalent, just disjoint-union them
# into one DFA, and ask whether their start states are distinct.


# ---------- treeP ----------
# I got this example from the folks at Semmle.
#
# Given a digraph and a predicate on nodes, find all nodes such that the
# subgraph reachable from them is a DAG all of whose nodes satisfy P.
#
# Note that this fixed point is monotone, but only by double negation. empty? is
# antitone, and not is antitone, thus (empty? {y | child[x,y], not (y in? self)}) is
# monotone in `self`.

def treeP: {node, node} -> {node} => {node}
fn child p => self as
  {x | p[x], empty? {y | child[x,y], not (y in? self)}}


# ---------- Simple static analyses ----------
type var = string
type line = nat
type op = Add | Sub | Mul | Div | Eq | Neq
type expr = Var(var) | Op(op, var, var)
type stmt = Skip
          | Assign(var, expr)
          | Return(var)
          | If(var, line, line)
          | Goto(line)
type prog = {line: stmt}

# ----- 1-step control flow -----
type flow = {line, line}

def flow: prog -> flow
fn prog =>
  for prog[i]: stmt do
  case stmt
  | Goto(j) -> {(i,k)}
  | If(_,j,k) -> {(i,j), (i,k)}
  | Skip | Assign(_) -> {(i,i+1)}
  | Return -> {}

# ----- Liveness analysis -----
def uses: expr -> {var}
fn e => case e | Var(v) => {v} | Op(_,v1,v2) => {v1,v2}

# Is some info about a variable flowing INTO a line, or OUT OF a line?
type how = In | Out

# How a given statement changes what variables are live. Given what variables
# are live after it, returns what variables are live before it.
def needs: stmt -> {var} => {var}
fn stmt after =>
  case stmt
  # Skip & goto do nothing.
  | Skip | Goto(_) -> after
  # Return statements and conditionals always need their variables.
  | Return(v) | If(v,_,_) -> {v} or after
  | Assign(v, e) ->
    # We supply the value of v.
    or (after \ {v})
    # We need our sources, if our destination is live.
    or (if v in? after do uses e)

def live: prog -> flow -> {line, how, var}
fn prog flows => self as
  # Propagate liveness info backwards.
  or {(i, Out, v) | flows[i,j], self[j, In, v]}
  # Analyse statements.
  or {(i, In, v) | prog[i]: stmt, v in needs stmt {v | self[i, Out, v]} }

# ----- TODO: Reaching assignment analysis -----
# ----- TODO: Constant variable analysis -----