Skip to content

Commit

Permalink
feat: add text option for buildFile* utilities (#5924)
Browse files Browse the repository at this point in the history
Adds an optional `text` argument to the `fetchFile*` and `buildFile*`
definitions that can be used to hash built files as text files (with
normalized line endings) instead of as binary files (the previous
default).

Separately, this change also significantly expands the documentation in
the `Lake.Build.Trace` module and preforms minor touchups of some build
job signatures.
  • Loading branch information
tydeu authored Nov 3, 2024
1 parent 3c15ab3 commit 7942882
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 70 deletions.
65 changes: 43 additions & 22 deletions src/lake/Lake/Build/Common.lean
Original file line number Diff line number Diff line change
Expand Up @@ -161,25 +161,29 @@ def clearFileHash (file : FilePath) : IO Unit := do

/--
Fetches the hash of a file that may already be cached in a `.hash` file.
If the `.hash` file does not exist or hash files are not trusted
(e.g., with `--rehash`), creates it with a newly computed hash.
If hash files are not trusted (e.g., with `--rehash`) or the `.hash` file does
not exist, it will be created with a newly computed hash.
If `text := true`, `file` is hashed as a text file rather than a binary file.
-/
def fetchFileHash (file : FilePath) : JobM Hash := do
def fetchFileHash (file : FilePath) (text := false) : JobM Hash := do
let hashFile := FilePath.mk <| file.toString ++ ".hash"
if (← getTrustHash) then
if let some hash ← Hash.load? hashFile then
return hash
let hash ← computeHash file
let hash ← computeFileHash file text
createParentDirs hashFile
IO.FS.writeFile hashFile hash.toString
return hash

/--
Fetches the trace of a file that may have already have its hash cached
in a `.hash` file. If no such `.hash` file exists, recomputes and creates it.
If `text := true`, `file` is hashed as text file rather than a binary file.
-/
def fetchFileTrace (file : FilePath) : JobM BuildTrace := do
return .mk (← fetchFileHash file) (← getMTime file)
def fetchFileTrace (file : FilePath) (text := false) : JobM BuildTrace := do
return .mk (← fetchFileHash file text) (← getMTime file)

/--
Builds `file` using `build` unless it already exists and `depTrace` matches
Expand All @@ -191,66 +195,83 @@ a `.log.json` file and replayed from there if the build is skipped.
For example, given `file := "foo.c"`, compares `depTrace` with that in
`foo.c.trace` with the hash cached in `foo.c.hash` and the log cached in
`foo.c.trace`.
If `text := true`, `file` is hashed as a text file rather than a binary file.
-/
def buildFileUnlessUpToDate
(file : FilePath) (depTrace : BuildTrace) (build : JobM PUnit)
(file : FilePath) (depTrace : BuildTrace) (build : JobM PUnit) (text := false)
: JobM BuildTrace := do
let traceFile := FilePath.mk <| file.toString ++ ".trace"
buildUnlessUpToDate file depTrace traceFile do
build
clearFileHash file
fetchFileTrace file
fetchFileTrace file text

/--
Build `file` using `build` after `dep` completes if the dependency's
trace (and/or `extraDepTrace`) has changed.
If `text := true`, `file` is handled as a text file rather than a binary file.
-/
@[inline] def buildFileAfterDep
(file : FilePath) (dep : BuildJob α) (build : α → JobM PUnit)
(extraDepTrace : JobM _ := pure BuildTrace.nil)
(extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false)
: SpawnM (BuildJob FilePath) :=
dep.bindSync fun depInfo depTrace => do
let depTrace := depTrace.mix (← extraDepTrace)
let trace ← buildFileUnlessUpToDate file depTrace <| build depInfo
let trace ← buildFileUnlessUpToDate file depTrace (build depInfo) text
return (file, trace)

/-- Build `file` using `build` after `deps` have built if any of their traces change. -/
/--
Build `file` using `build` after `deps` have built if any of their traces change.
If `text := true`, `file` is handled as a text file rather than a binary file.
-/
@[inline] def buildFileAfterDepList
(file : FilePath) (deps : List (BuildJob α)) (build : List α → JobM PUnit)
(extraDepTrace : JobM _ := pure BuildTrace.nil)
(extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false)
: SpawnM (BuildJob FilePath) := do
buildFileAfterDep file (← BuildJob.collectList deps) build extraDepTrace
buildFileAfterDep file (.collectList deps) build extraDepTrace text

/--
Build `file` using `build` after `deps` have built if any of their traces change.
/-- Build `file` using `build` after `deps` have built if any of their traces change. -/
If `text := true`, `file` is handled as a text file rather than a binary file.
-/
@[inline] def buildFileAfterDepArray
(file : FilePath) (deps : Array (BuildJob α)) (build : Array α → JobM PUnit)
(extraDepTrace : JobM _ := pure BuildTrace.nil)
(extraDepTrace : JobM _ := pure BuildTrace.nil) (text := false)
: SpawnM (BuildJob FilePath) := do
buildFileAfterDep file (← BuildJob.collectArray deps) build extraDepTrace
buildFileAfterDep file (.collectArray deps) build extraDepTrace text

/-! ## Common Builds -/

/--
A build job for binary file that is expected to already exist (e.g., a data blob).
Any byte difference in the file will trigger a rebuild of dependents.
Any byte difference in a binary file will trigger a rebuild of its dependents.
-/
def inputBinFile (path : FilePath) : SpawnM (BuildJob FilePath) :=
Job.async <| (path, ·) <$> computeTrace path

/--
A build job for text file that is expected to already exist (e.g., a source file).
Normalizes line endings (converts CRLF to LF) to produce platform-independent traces.
Text file traces have normalized line endings to avoid unnecessary rebuilds across platforms.
-/
def inputTextFile (path : FilePath) : SpawnM (BuildJob FilePath) :=
Job.async <| (path, ·) <$> computeTrace (TextFilePath.mk path)

/--
A build job for file that is expected to already exist.
A build job for file that is expected to already exist (e.g., a data blob or source file).
**Deprecated:** Use either `inputTextFile` or `inputBinFile`.
`inputTextFile` normalizes line endings to produce platform-independent traces.
If `text := true`, the file is handled as a text file rather than a binary file.
Any byte difference in a binary file will trigger a rebuild of its dependents.
In contrast, text file traces have normalized line endings to avoid unnecessary
rebuilds across platforms.
-/
@[deprecated (since := "2024-06-08")] abbrev inputFile := @inputBinFile
@[inline] def inputFile (path : FilePath) (text : Bool) : SpawnM (BuildJob FilePath) :=
if text then inputTextFile path else inputBinFile path

/--
Build an object file from a source file job using `compiler`. The invocation is:
Expand Down
14 changes: 7 additions & 7 deletions src/lake/Lake/Build/Job.lean
Original file line number Diff line number Diff line change
Expand Up @@ -298,11 +298,11 @@ instance : Functor BuildJob where
@[inline] protected def wait? (self : BuildJob α) : BaseIO (Option α) :=
(·.map (·.1)) <$> self.toJob.wait?

def add (t1 : BuildJob α) (t2 : BuildJob β) : BuildJob α :=
mk <| t1.toJob.zipWith (fun a _ => a) t2.toJob
def add (self : BuildJob α) (other : BuildJob β) : BuildJob α :=
mk <| self.toJob.zipWith (fun a _ => a) other.toJob

def mix (t1 : BuildJob α) (t2 : BuildJob β) : BuildJob Unit :=
mk <| t1.toJob.zipWith (fun (_,t) (_,t') => ((), mixTrace t t')) t2.toJob
def mix (self : BuildJob α) (other : BuildJob β) : BuildJob Unit :=
mk <| self.toJob.zipWith (fun (_,t) (_,t') => ((), mixTrace t t')) other.toJob

def mixList (jobs : List (BuildJob α)) : Id (BuildJob Unit) := ofJob $
jobs.foldr (·.toJob.zipWith (fun (_,t') t => mixTrace t t') ·) (pure nilTrace)
Expand All @@ -311,12 +311,12 @@ def mixArray (jobs : Array (BuildJob α)) : Id (BuildJob Unit) := ofJob $
jobs.foldl (·.zipWith (fun t (_,t') => mixTrace t t') ·.toJob) (pure nilTrace)

def zipWith
(f : α → β → γ) (t1 : BuildJob α) (t2 : BuildJob β)
(f : α → β → γ) (self : BuildJob α) (other : BuildJob β)
: BuildJob γ :=
mk <| t1.toJob.zipWith (fun (a,t) (b,t') => (f a b, mixTrace t t')) t2.toJob
mk <| self.toJob.zipWith (fun (a,t) (b,t') => (f a b, mixTrace t t')) other.toJob

def collectList (jobs : List (BuildJob α)) : Id (BuildJob (List α)) :=
return jobs.foldr (zipWith List.cons) (pure [])

def collectArray (jobs : Array (BuildJob α)) : Id (BuildJob (Array α)) :=
return jobs.foldl (zipWith Array.push) (pure #[])
return jobs.foldl (zipWith Array.push) (pure (Array.mkEmpty jobs.size))
110 changes: 69 additions & 41 deletions src/lake/Lake/Build/Trace.lean
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@ Authors: Mac Malone
import Lake.Util.IO
import Lean.Data.Json

/-! # Lake Traces
This module defines Lake traces and associated utilities.
Traces are used to determine whether a Lake build artifact is *dirty*
(needs to be rebuilt) or is already *up-to-date*.
The primary type is `Lake.BuildTrace`.
-/

open System Lean

namespace Lake

--------------------------------------------------------------------------------
/-! # Utilities -/
/-! ## Utilities -/
--------------------------------------------------------------------------------

class CheckExists.{u} (i : Type u) where
Expand All @@ -24,60 +32,63 @@ instance : CheckExists FilePath where
checkExists := FilePath.pathExists

--------------------------------------------------------------------------------
/-! # Trace Abstraction -/
/-! ## Trace Abstraction -/
--------------------------------------------------------------------------------

class ComputeTrace.{u,v,w} (i : Type u) (m : outParam $ Type v → Type w) (t : Type v) where
/-- Compute the trace of some target info using information from the monadic context. -/
computeTrace : i → m t
class ComputeTrace : Type u) (m : outParam $ Type v → Type w) (τ : Type v) where
/-- Compute the trace of an object in its preferred monad. -/
computeTrace : α → m τ

@[inline] def computeTrace [ComputeTrace i m t] [MonadLiftT m n] (info : i) : n t :=
liftM <| ComputeTrace.computeTrace info
/-- Compute the trace of an object in a supporting monad. -/
@[inline] def computeTrace [ComputeTrace α m τ] [MonadLiftT m n] (a : α) : n τ :=
liftM <| ComputeTrace.computeTrace a

class NilTrace.{u} (t : Type u) where
class NilTrace.{u} (α : Type u) where
/-- The nil trace. Should not unduly clash with a proper trace. -/
nilTrace : t
nilTrace : α

export NilTrace (nilTrace)

instance inhabitedOfNilTrace [NilTrace t] : Inhabited t := ⟨nilTrace⟩
instance inhabitedOfNilTrace [NilTrace α] : Inhabited α := ⟨nilTrace⟩

class MixTrace.{u} (t : Type u) where
class MixTrace.{u} (α : Type u) where
/-- Combine two traces. The result should be dirty if either of the inputs is dirty. -/
mixTrace : ttt
mixTrace : ααα

export MixTrace (mixTrace)

section
variable [MixTrace t] [NilTrace t]
variable [MixTrace τ] [NilTrace τ]

def mixTraceList (traces : List t) : t :=
/- Combine a `List` of traces (left-to-right). -/
def mixTraceList (traces : List τ) : τ :=
traces.foldl mixTrace nilTrace

def mixTraceArray (traces : Array t) : t :=
/- Combine an `Array` of traces (left-to-right). -/
def mixTraceArray (traces : Array τ) : τ :=
traces.foldl mixTrace nilTrace

variable [ComputeTrace i m t]
variable [ComputeTrace α m τ]

@[specialize] def computeListTrace [MonadLiftT m n] [Monad n] (artifacts : List i) : n t :=
artifacts.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace
/- Compute the trace of each element of a `List` and combine them (left-to-right). -/
@[inline] def computeListTrace [MonadLiftT m n] [Monad n] (as : List α) : n τ :=
as.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace

instance [Monad m] : ComputeTrace (List i) m t := ⟨computeListTrace⟩
instance [Monad m] : ComputeTrace (List α) m τ := ⟨computeListTrace⟩

@[specialize] def computeArrayTrace [MonadLiftT m n] [Monad n] (artifacts : Array i) : n t :=
artifacts.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace
/- Compute the trace of each element of an `Array` and combine them (left-to-right). -/
@[inline] def computeArrayTrace [MonadLiftT m n] [Monad n] (as : Array α) : n τ :=
as.foldlM (fun ts t => return mixTrace ts (← computeTrace t)) nilTrace

instance [Monad m] : ComputeTrace (Array i) m t := ⟨computeArrayTrace⟩
instance [Monad m] : ComputeTrace (Array α) m τ := ⟨computeArrayTrace⟩
end

--------------------------------------------------------------------------------
/-! # Hash Trace -/
/-! ## Hash Trace -/
--------------------------------------------------------------------------------

/--
A content hash.
TODO: Use a secure hash rather than the builtin Lean hash function.
-/
/-- A content hash. -/
-- TODO: Use a secure hash rather than the builtin Lean hash function.
structure Hash where
val : UInt64
deriving BEq, DecidableEq, Repr
Expand Down Expand Up @@ -127,51 +138,66 @@ instance : FromJson Hash := ⟨Hash.fromJson?⟩
end Hash

class ComputeHash (α : Type u) (m : outParam $ TypeType v) where
/-- Compute the hash of an object in its preferred monad. -/
computeHash : α → m Hash

instance [ComputeHash α m] : ComputeTrace α m Hash := ⟨ComputeHash.computeHash⟩

/-- Compute the hash of object `a` in a pure context. -/
@[inline] def pureHash [ComputeHash α Id] (a : α) : Hash :=
ComputeHash.computeHash a

/-- Compute the hash an object in an supporting monad. -/
@[inline] def computeHash [ComputeHash α m] [MonadLiftT m n] (a : α) : n Hash :=
liftM <| ComputeHash.computeHash a

instance : ComputeHash String Id := ⟨Hash.ofString⟩

def computeFileHash (file : FilePath) : IO Hash :=
/--
Compute the hash of a binary file.
Binary files are equivalent only if they are byte identical.
-/
def computeBinFileHash (file : FilePath) : IO Hash :=
Hash.ofByteArray <$> IO.FS.readBinFile file

instance : ComputeHash FilePath IO := ⟨computeFileHash
instance : ComputeHash FilePath IO := ⟨computeBinFileHash

/--
Compute the hash of a text file.
Normalizes `\r\n` sequences to `\n` for cross-platform compatibility.
-/
def computeTextFileHash (file : FilePath) : IO Hash := do
let text ← IO.FS.readFile file
let text := text.crlfToLf
return Hash.ofString text

/--
A wrapper around `FilePath` that adjusts its `ComputeHash` implementation
to normalize `\r\n` sequences to `\n` for cross-platform compatibility. -/
A wrapper around `FilePath` that adjusts its `ComputeHash` implementation
to normalize `\r\n` sequences to `\n` for cross-platform compatibility.
-/
structure TextFilePath where
path : FilePath

instance : Coe TextFilePath FilePath := ⟨(·.path)⟩
instance : ComputeHash TextFilePath IO := ⟨(computeTextFileHash ·)⟩

instance : ComputeHash TextFilePath IO where
computeHash file := computeTextFileHash file
/-- Compute the hash of a file. If `text := true`, normalize line endings. -/
@[inline] def computeFileHash (file : FilePath) (text := false) : IO Hash :=
if text then computeTextFileHash file else computeBinFileHash file

@[specialize] def computeArrayHash [ComputeHash α m] [Monad m] (xs : Array α) : m Hash :=
xs.foldlM (fun h a => return h.mix (← computeHash a)) .nil
/-- Compute the hash of each element of an array and combine them (left-to-right). -/
@[inline] def computeArrayHash [ComputeHash α m] [Monad m] (as : Array α) : m Hash :=
computeArrayTrace as

instance [ComputeHash α m] [Monad m] : ComputeHash (Array α) m := ⟨computeArrayHash⟩

--------------------------------------------------------------------------------
/-! # Modification Time (MTime) Trace -/
/-! ## Modification Time (MTime) Trace -/
--------------------------------------------------------------------------------

open IO.FS (SystemTime)

/-- A modification time. -/
/-- A modification time (e.g., of a file). -/
def MTime := SystemTime

namespace MTime
Expand All @@ -192,12 +218,14 @@ instance : MixTrace MTime := ⟨max⟩

end MTime

class GetMTime (α) where
class GetMTime (α : Type u) where
/-- Return the modification time of an object. -/
getMTime : α → IO MTime

export GetMTime (getMTime)
instance [GetMTime α] : ComputeTrace α IO MTime := ⟨getMTime⟩

/-- Return the modification time of a file recorded by the OS. -/
@[inline] def getFileMTime (file : FilePath) : IO MTime :=
return (← file.metadata).modified

Expand All @@ -216,7 +244,7 @@ That is, check if the info is newer than `self`.
| .error _ => return false

--------------------------------------------------------------------------------
/-! # Lake Build Trace (Hash + MTIme) -/
/-! ## Lake Build Trace -/
--------------------------------------------------------------------------------

/-- Trace used for common Lake targets. Combines `Hash` and `MTime`. -/
Expand All @@ -242,10 +270,10 @@ def nil : BuildTrace :=

instance : NilTrace BuildTrace := ⟨nil⟩

@[specialize] def compute [ComputeHash i m] [MonadLiftT m IO] [GetMTime i] (info : i) : IO BuildTrace :=
@[specialize] def compute [ComputeHash α m] [MonadLiftT m IO] [GetMTime α] (info : α) : IO BuildTrace :=
return mk (← computeHash info) (← getMTime info)

instance [ComputeHash i m] [MonadLiftT m IO] [GetMTime i] : ComputeTrace i IO BuildTrace := ⟨compute⟩
instance [ComputeHash α m] [MonadLiftT m IO] [GetMTime α] : ComputeTrace α IO BuildTrace := ⟨compute⟩

def mix (t1 t2 : BuildTrace) : BuildTrace :=
mk (Hash.mix t1.hash t2.hash) (max t1.mtime t2.mtime)
Expand Down

0 comments on commit 7942882

Please sign in to comment.