build graph outside of Tensor; encapsulate graph type (#369)

joelberkeley · Dec 10, 2023 · 5ceff1f · 5ceff1f
1 parent 5d31e9c
commit 5ceff1f
Show file tree

Hide file tree

Showing 25 changed files with 542 additions and 472 deletions.
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -24,13 +24,15 @@ jobs:
     container: ghcr.io/stefan-hoeck/idris2-pack
     steps:
     - uses: actions/checkout@v3
-    - name: Install Bazel
+    - name: Build backend
       working-directory: backend
       run: |
         ./install_bazel.sh
         ./build.sh cpu
     - name: Run tests
-      run: pack --no-prompt run test.ipkg
+      run: |
+        pack --no-prompt build test.ipkg
+        pack run test.ipkg
       env:
         LD_LIBRARY_PATH: $LD_LIBRARY_PATH:backend/bazel-bin
   readme:

diff --git a/README.md b/README.md
@@ -28,21 +28,21 @@ import Literal
 import Tensor
 -->
 ```idris
-x : Ref $ Tensor [3] S32
+x : Graph $ Tensor [3] S32
 x = tensor [1, 2, 3]
 
-y : Ref $ Tensor [3] S32
+y : Graph $ Tensor [3] S32
 y = x + tensor [0, 1, 2]
 ```
 but this won't
 ```idris
 failing "elaboration"
-  z : Ref $ Tensor [3] S32
+  z : Graph $ Tensor [3] S32
   z = x + tensor [0, 1]
 ```
 because you can't add a vector of length two to a vector of length three. Shape manipulation extends beyond comparing literal dimension sizes to arbitrary symbolic manipulation
 ```idris
-append : Tensor [m, p] F64 -> Tensor [n, p] F64 -> Ref $ Tensor [m + n, p] F64
+append : Tensor [m, p] F64 -> Tensor [n, p] F64 -> Graph $ Tensor [m + n, p] F64
 append x y = concat 0 x y
 ```
 As a bonus, spidr programs are reproducible. Any one graph will always produce the same result when run on the same hardware.

diff --git a/src/BayesianOptimization.idr b/src/BayesianOptimization.idr
@@ -27,20 +27,20 @@ import Data
 import Model
 import Tensor
 
-||| A `Stream`-like collection where each successive element is wrapped in an additional `Ref`.
+||| A `Stream`-like collection where each successive element extends the `Graph`.
 public export
-data RefStream : Type -> Type where
-  (::) : a -> Inf (Ref (RefStream a)) -> RefStream a
+data GraphStream : Type -> Type where
+  (::) : a -> Inf (Graph (GraphStream a)) -> GraphStream a
 
-||| Take `n` values from a `RefStream`, sequencing the `Ref` effects.
+||| Take `n` values from a `GraphStream`, sequencing the `Graph` effects.
 public export
-take : (n : Nat) -> RefStream a -> Ref $ Vect n a
+take : (n : Nat) -> GraphStream a -> Graph $ Vect n a
 take Z _ = pure Nil
 take (S k) (x :: xs) = pure (x :: !(take k !xs))
 
 ||| Create an infinite stream of values from a generator function and a starting value.
 export covering
-iterate : (a -> Ref a) -> a -> Ref $ RefStream a
+iterate : (a -> Graph a) -> a -> Graph $ GraphStream a
 iterate f x = do
   x' <- f x
   pure (x' :: iterate f x')
@@ -52,12 +52,12 @@ iterate f x = do
 ||| @tactic The tactic, such as an optimized acquisition function, to find a new point from the
 |||   data and model
 export
-step : (objective : forall n . Tensor (n :: features) F64 -> Ref $ Tensor (n :: targets) F64) ->
+step : (objective : forall n . Tensor (n :: features) F64 -> Graph $ Tensor (n :: targets) F64) ->
        (probabilisticModel : ProbabilisticModel features targets marginal model) =>
-       (train : Dataset features targets -> model -> Ref $ model) ->
-       (tactic : Reader (DataModel {probabilisticModel} model) (Ref $ Tensor (1 :: features) F64)) ->
+       (train : Dataset features targets -> model -> Graph $ model) ->
+       (tactic : Reader (DataModel {probabilisticModel} model) (Graph $ Tensor (1 :: features) F64)) ->
        DataModel {probabilisticModel} model ->
-       Ref $ DataModel {probabilisticModel} model
+       Graph $ DataModel {probabilisticModel} model
 step objective train tactic env = do
   newPoint <- runReader env tactic
   dataset <- concat env.dataset $ MkDataset newPoint !(objective newPoint)

diff --git a/src/BayesianOptimization/Acquisition.idr b/src/BayesianOptimization/Acquisition.idr
@@ -47,7 +47,7 @@ record DataModel modelType {auto probabilisticModel : ProbabilisticModel f t mar
 ||| @features The shape of the feature domain.
 public export 0
 Acquisition : (0 batchSize : Nat) -> {auto 0 _ : GT batchSize 0} -> (0 features : Shape) -> Type
-Acquisition batchSize features = Tensor (batchSize :: features) F64 -> Ref $ Tensor [] F64
+Acquisition batchSize features = Tensor (batchSize :: features) F64 -> Graph $ Tensor [] F64
 
 ||| Construct the acquisition function that estimates the absolute improvement in the best
 ||| observation if we were to evaluate the objective at a given point.

diff --git a/src/Compiler/Eval.idr b/src/Compiler/Eval.idr
@@ -57,32 +57,44 @@ export
 Show Err where
   show (IndexErr msg) = "IndexErr: \{msg}"
 
-Computation : Type -> Type
+0 Computation : Type -> Type
 Computation = StateT (SortedMap Nat XlaOp) (EitherT Err IO)
 
-lookup : Nat -> Computation XlaOp
+||| Look up the `XlaOp` at `position` in the graph.
+lookup : (position : Nat) -> Computation XlaOp
 lookup n = do
-  case lookup n !get of
+  cache <- get
+  case lookup n cache of
        Nothing =>
-         lift $ left (IndexErr "Tried to look up value at index \{show n} but none was found.")
+         lift $ left (IndexErr "Tried to look up value at index \{show n} but found keys \{show $ toList (keys cache)}")
        Just op => pure op
 
 interpret : XlaBuilder -> Nat -> Env -> Computation XlaOp
 
-buildSub : XlaBuilder -> String -> Fn arity -> Computation XlaComputation
-buildSub builder name (MkFn params i env) = do
+||| Build a computation from an inner function
+|||
+||| @xlaBuilder The enclosing XLA builder in which this function is built.
+|||   This is not the XLA builder used to build the computation itself.
+||| @computationName The name of the computation.
+||| @arity The function arity.
+||| @f The function to build.
+buildSub : (xlaBuilder : XlaBuilder) ->
+           (computationName : String) ->
+           (f : Fn arity) ->
+           Computation XlaComputation
+buildSub builder name (MkFn params result env) = do
   subBuilder <- createSubBuilder builder name
   traverse_ (interpretParameter subBuilder) (enumerate params)
-  root <- assert_total $ interpret subBuilder i env
+  root <- assert_total $ interpret subBuilder result env
   build subBuilder root
 
   where
 
   interpretParameter : XlaBuilder -> (Nat, Nat, ShapeAndType) -> Computation ()
-  interpretParameter builder (position, i, MkShapeAndType shape dtype) = do
+  interpretParameter builder (positionInFnParams, positionInGraph, MkShapeAndType shape dtype) = do
     xlaShape <- mkShape {dtype} shape
-    param <- parameter builder position xlaShape name
-    put $ insert i param !get
+    param <- parameter builder positionInFnParams xlaShape name
+    put $ insert positionInGraph param !get
 
 covering
 enqueue : XlaBuilder -> Expr -> Computation XlaOp

diff --git a/src/Compiler/Expr.idr b/src/Compiler/Expr.idr
@@ -18,6 +18,7 @@ module Compiler.Expr
 import Data.SortedMap
 import Decidable.Equality
 
+import Control.Monad.State
 import Compiler.LiteralRW
 import Compiler.Xla.TensorFlow.Compiler.Xla.XlaData
 import Literal
@@ -29,15 +30,39 @@ public export
 data ShapeAndType : Type where
   MkShapeAndType : Shape -> (0 dtype : Type) -> Primitive dtype => ShapeAndType
 
+public export
 data Expr : Type where
 
-public export 0
-Env : Type
-Env = SortedMap Nat Expr
+export
+data Env = MkEnv Nat (SortedMap Nat Expr)
+
+export
+empty : Env
+empty = MkEnv 0 empty
+
+export
+addNode : Expr -> State Env Nat
+addNode expr = do
+  MkEnv next env <- get
+  put (MkEnv (S next) (insert next expr env))
+  pure next
+
+export
+toList : Env -> List (Nat, Expr)
+toList (MkEnv _ env) = toList env
 
 public export
 data Fn : Nat -> Type where
-  MkFn : {arity : _} -> Vect arity (Nat, ShapeAndType) -> Nat -> Env -> Fn arity
+
+  ||| @arity The function arity.
+  ||| @params The function parameter position in the graph, along with its shape and dtype.
+  ||| @result The position of the function result in the graph.
+  ||| @env The function graph. Includes only nodes in this scope, not outer or inner scope.
+  MkFn : {arity : _} ->
+         (params : Vect arity (Nat, ShapeAndType)) ->
+         (result : Nat) ->
+         (env : Env) ->
+         Fn arity
 
 public export
 data BinaryOp =
@@ -105,7 +130,13 @@ data Expr : Type where
   Transpose : List Nat -> Nat -> Expr
   Identity : Primitive dtype => Nat -> Expr
   Broadcast : Primitive dtype => Shape -> Shape -> Nat -> Expr
-  Map : Fn n -> Vect n Nat -> Shape -> Expr
+
+  ||| Apply function `f` with given `arity` over `args`.
+  |||
+  ||| @f The function to apply.
+  ||| @args The arguments to apply `f` to.
+  Map : (f : Fn arity) -> (args : Vect arity Nat) -> Shape -> Expr
+
   Reduce : Fn 2 -> Nat -> List Nat -> Nat -> Expr
   Sort : Fn 2 -> Nat -> Bool -> List Nat -> Expr
   Reverse : List Nat -> Nat -> Expr
@@ -120,3 +151,30 @@ data Expr : Type where
   TriangularSolve : Nat -> Nat -> Bool -> Expr
   UniformFloatingPoint : Nat -> Nat -> Nat -> Nat -> Shape -> Expr
   NormalFloatingPoint : Nat -> Nat -> Shape -> Expr
+
+public export 0
+FnExpr : Nat -> Type
+FnExpr 0 = State Env Nat
+FnExpr (S k) = Nat -> FnExpr k
+
+applyN : FnExpr arity -> Vect arity Nat -> State Env Nat
+applyN f [] = f
+applyN f (x :: xs) = applyN (f x) xs
+
+export
+addFn : {arity : _} -> Vect arity ShapeAndType -> FnExpr arity -> State Env (Fn arity)
+addFn params f = do
+  MkEnv next env <- get
+  let (subEnv@(MkEnv next _), params, result) = runState (MkEnv next empty) $ do
+        xs <- traverse addArg params
+        result <- applyN f xs
+        pure (zip xs params, result)
+  put (MkEnv next env)
+  pure (MkFn params result subEnv)
+
+  where
+  addArg : ShapeAndType -> State Env Nat
+  addArg st = do
+    MkEnv next env <- get
+    put (MkEnv (S next) (insert next (Arg next) env))
+    pure next
diff --git a/src/Constants.idr b/src/Constants.idr
@@ -18,5 +18,5 @@ module Constants
 import Tensor
 
 export
-pi : Ref $ Tensor [] F64
+pi : Graph $ Tensor [] F64
 pi = fromDouble pi
diff --git a/src/Data.idr b/src/Data.idr
@@ -40,6 +40,6 @@ record Dataset (0 featureShape, targetShape : Shape) where
 
 ||| Concatenate two datasets along their leading axis.
 export
-concat : Dataset features targets -> Dataset features targets -> Ref $ Dataset features targets
+concat : Dataset features targets -> Dataset features targets -> Graph $ Dataset features targets
 concat (MkDataset {s=s} x y) (MkDataset {s=s'} x' y') =
   [| MkDataset {s=s + S s'} (concat 0 x x') (concat 0 y y') |]
diff --git a/src/Distribution.idr b/src/Distribution.idr
@@ -30,14 +30,14 @@ import Constants
 public export
 interface Distribution (0 dist : (0 event : Shape) -> (0 dim : Nat) -> Type) where
   ||| The mean of the distribution.
-  mean : dist event dim -> Ref $ Tensor (dim :: event) F64
+  mean : dist event dim -> Graph $ Tensor (dim :: event) F64
 
   ||| The covariance, or correlation, between sub-events.
-  cov : dist event dim -> Ref $ Tensor (dim :: dim :: event) F64
+  cov : dist event dim -> Graph $ Tensor (dim :: dim :: event) F64
 
 ||| The variance of a single random variable.
 export
-variance : {event : _} -> Distribution dist => dist event 1 -> Ref $ Tensor (1 :: event) F64
+variance : {event : _} -> Distribution dist => dist event 1 -> Graph $ Tensor (1 :: event) F64
 variance dist = squeeze {from=(1 :: 1 :: event)} =<< cov dist
 
 ||| A joint, or multivariate distribution over a tensor of floating point values, where the density
@@ -52,11 +52,11 @@ interface Distribution dist  =>
   ClosedFormDistribution (0 event : Shape)
     (0 dist : (0 event : Shape) -> (0 dim : Nat) -> Type) where
       ||| The probability density function of the distribution at the specified point.
-      pdf : dist event (S d) -> Tensor (S d :: event) F64 -> Ref $ Tensor [] F64
+      pdf : dist event (S d) -> Tensor (S d :: event) F64 -> Graph $ Tensor [] F64
 
       ||| The cumulative distribution function of the distribution at the specified point (that is,
       ||| the probability the random variable takes a value less than or equal to the given point).
-      cdf : dist event (S d) -> Tensor (S d :: event) F64 -> Ref $ Tensor [] F64
+      cdf : dist event (S d) -> Tensor (S d :: event) F64 -> Graph $ Tensor [] F64
 
 ||| A joint Gaussian distribution.
 |||

diff --git a/src/Model.idr b/src/Model.idr
@@ -32,4 +32,4 @@ interface Distribution marginal => ProbabilisticModel
     model | model
   where
     ||| Return the marginal distribution over the target domain at the specified feature values.
-    marginalise : model -> {n : _} -> Tensor (S n :: features) F64 -> Ref $ marginal targets (S n)
+    marginalise : model -> {n : _} -> Tensor (S n :: features) F64 -> Graph $ marginal targets (S n)
diff --git a/src/Model/GaussianProcess.idr b/src/Model/GaussianProcess.idr
@@ -43,7 +43,7 @@ posterior :
   GaussianProcess features ->
   Tensor [] F64 ->
   {s : _} -> (Tensor ((S s) :: features) F64, Tensor [S s] F64) ->
-  Ref $ GaussianProcess features
+  Graph $ GaussianProcess features
 posterior (MkGP priorMeanf priorKernel) noise (xTrain, yTrain) = do
   l <- cholesky !(priorKernel xTrain xTrain + pure noise * identity)
   let alpha = (pure l).T \| (pure l |\ pure yTrain)
@@ -61,7 +61,7 @@ logMarginalLikelihood :
   GaussianProcess features ->
   Tensor [] F64 ->
   {s : _} -> (Tensor ((S s) :: features) F64, Tensor [S s] F64) ->
-  Ref $ Tensor [] F64
+  Graph $ Tensor [] F64
 logMarginalLikelihood (MkGP _ kernel) noise (x, y) = do
   l <- cholesky !(kernel x x + pure noise * identity)
   let alpha = (pure l).T \| (pure l |\ pure y)
@@ -86,7 +86,7 @@ data ConjugateGPRegression : (0 features : Shape) -> Type where
   ||| @noise The likehood amplitude, or observation noise.
   MkConjugateGPR :
     {p : _} ->
-    (gpFromHyperparameters : Tensor [p] F64 -> Ref $ GaussianProcess features) ->
+    (gpFromHyperparameters : Tensor [p] F64 -> Graph $ GaussianProcess features) ->
     (hyperparameters : Tensor [p] F64) ->
     (noise : Tensor [] F64) ->
     ConjugateGPRegression features
@@ -111,16 +111,16 @@ export
 fit : (forall n . Tensor [n] F64 -> Optimizer $ Tensor [n] F64)
   -> Dataset features [1]
   -> ConjugateGPRegression features
-  -> Ref $ ConjugateGPRegression features
+  -> Graph $ ConjugateGPRegression features
 fit optimizer (MkDataset x y) (MkConjugateGPR {p} mkPrior gpParams noise) = do
-  let objective : Tensor [S p] F64 -> Ref $ Tensor [] F64
+  let objective : Tensor [S p] F64 -> Graph $ Tensor [] F64
       objective params = do
         priorParams <- slice [1.to (S p)] params
         logMarginalLikelihood !(mkPrior priorParams) !(slice [at 0] params) (x, !(squeeze y))
 
   params <- optimizer !(concat 0 !(expand 0 noise) gpParams) objective
 
-  let mkPosterior : Tensor [p] F64 -> Ref $ GaussianProcess features
+  let mkPosterior : Tensor [p] F64 -> Graph $ GaussianProcess features
       mkPosterior params' = posterior !(mkPrior params') !(squeeze noise) (x, !(squeeze y))
 
   pure $ MkConjugateGPR mkPosterior !(slice [1.to (S p)] params) !(slice [at 0] params)

diff --git a/src/Model/Kernel.idr b/src/Model/Kernel.idr
@@ -30,14 +30,14 @@ Kernel features =
   {sk, sk' : _} ->
   Tensor (sk :: features) F64 ->
   Tensor (sk' :: features) F64 ->
-  Ref $ Tensor [sk, sk'] F64
+  Graph $ Tensor [sk, sk'] F64
 
 scaledL2Norm :
   Tensor [] F64 ->
   {d, n, n' : _} ->
   Tensor [n, S d] F64 ->
   Tensor [n', S d] F64 ->
-  Ref $ Tensor [n, n'] F64
+  Graph $ Tensor [n, n'] F64
 scaledL2Norm len x x' =
   let xs = broadcast {to=[n, n', S d]} =<< expand 1 x
    in reduce @{Sum} [2] =<< ((xs - broadcast !(expand 0 x')) / pure len) ^ fill 2.0

diff --git a/src/Model/MeanFunction.idr b/src/Model/MeanFunction.idr
@@ -25,7 +25,7 @@ import Tensor
 ||| @features The shape of the feature domain.
 public export 0
 MeanFunction : (0 features : Shape) -> Type
-MeanFunction features = {sm : _} -> Tensor (sm :: features) F64 -> Ref $ Tensor [sm] F64
+MeanFunction features = {sm : _} -> Tensor (sm :: features) F64 -> Graph $ Tensor [sm] F64
 
 ||| A mean function where the mean is zero in all target dimensions.
 export

diff --git a/src/Optimize.idr b/src/Optimize.idr
@@ -24,7 +24,7 @@ import Tensor
 ||| @domain The type of the domain over which to find the optimal value.
 public export 0
 Optimizer : (0 domain : Type) -> Type
-Optimizer a = (a -> Ref $ Tensor [] F64) -> Ref a
+Optimizer a = (a -> Graph $ Tensor [] F64) -> Graph a
 
 ||| Construct an `Optimizer` that implements grid search over a scalar feature space. Grid search
 ||| approximates the optimum by evaluating the objective over a finite, evenly-spaced grid.