Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/lake/Lake/Build/Context.lean
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ public structure BuildConfig extends LogConfig where
trustHash : Bool := true
/-- Early exit if a target has to be rebuilt. -/
noBuild : Bool := false
/-- Stop the build monitor after the first required target failure is detected. -/
stopOnFirstError : Bool := false
/-- Verbosity level (`-q`, `-v`, or neither). -/
verbosity : Verbosity := .normal
/-- Whether to print a message when the build finishes successfully (if not quiet). -/
Expand Down Expand Up @@ -54,6 +56,11 @@ public structure BuildContext extends BuildConfig, Context where
If `none`, tracking outputs is disabled for this build.
-/
outputsRef? : Option CacheRef := none
/--
When set to `true`, stops new build jobs from being scheduled.
Already-running tasks complete normally; no new work is dispatched.
-/
cancelling? : Option IO.CancelToken := none

/-- A transformer to equip a monad with a `BuildContext`. -/
public abbrev BuildT := ReaderT BuildContext
Expand Down
4 changes: 4 additions & 0 deletions src/lake/Lake/Build/Index.lean
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ namespace Lake
private def recBuildWithIndex (info : BuildInfo) : FetchM (Job (BuildData info.key)) :=
match info with
| .target pkg target => do
if let some tk := (← getBuildContext).cancelling? then
if ← tk.isSet then return .cancelled -- cancelled: skip scheduling new work
if let some decl := pkg.findTargetDecl? target then
if h : decl.kind.isAnonymous then
let key := BuildKey.packageTarget pkg.keyName target
Expand All @@ -40,6 +42,8 @@ private def recBuildWithIndex (info : BuildInfo) : FetchM (Job (BuildData info.k
else
error s!"invalid target '{info}': target not found in package"
| .facet target kind data facet => do
if let some tk := (← getBuildContext).cancelling? then
if ← tk.isSet then return .cancelled -- cancelled: skip scheduling new work
if let some config := (← getWorkspace).findFacetConfig? facet then
if h : config.kind = kind then
let recFetch := config.fetchFn <| cast (by simp [h]) data
Expand Down
27 changes: 24 additions & 3 deletions src/lake/Lake/Build/Job/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,32 @@ public def JobState.merge (a b : JobState) : JobState where

/-! ## JobTask -/

/-- The exceptional (non-success) outcome kind of a Lake job result. -/
public inductive JobException
/-- A build failure; log entries from `pos` onward describe the error. -/
| errorLogged : Log.Pos → JobException
/-- The job was cancelled before it could complete. -/
| cancelled
deriving Inhabited

/-- The result of a Lake job. -/
public abbrev JobResult α := EResult Log.Pos JobState α
Copy link
Contributor

@eric-wieser eric-wieser Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could do this with less work with

inductive JobError
| log : Log.Pos → JobError
| cancelled

public abbrev JobResult α := EResult JobError JobState α

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty nice, although now that I bit the bullet I don't know if I prefer cancellation being an "error" rather than a distinct state. Does keeping the abbrev have some more benefits except for avoiding all those new adaptation functions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main advantage is you get to reuse all the EResult machinery, and the code for error propagation (in bind) can pass the JobError object along unchanged without taking it apart and putting it back together each time, which is likely to be faster at runtime.

You could use JobException or JobStatus instead, I'm not attached to the name. But for reference, Python uses asyncio.CancelledError for such a concept, so I don't think treating cancellations as an error is really that strange.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sold. Some pattern matching is a bit more awkward, but still pretty okay.

public abbrev JobResult α := EResult JobException JobState α

namespace JobResult

/-- Convert an `EResult Log.Pos` (from `JobM`) to a `JobResult`. -/
@[inline] public def ofLogResult : EResult Log.Pos JobState α → JobResult α
| .ok a s => .ok a s
| .error e s => .error (.errorLogged e) s

end JobResult

/-- Add log entries to the beginning of the job's log. -/
public def JobResult.prependLog (log : Log) (self : JobResult α) : JobResult α :=
match self with
| .ok a s => .ok a <| s.modifyLog (log ++ ·)
| .error e s => .error ⟨log.size + e.val⟩ <| s.modifyLog (log ++ ·)
| .error (.errorLogged e) s => .error (.errorLogged ⟨log.size + e.val⟩) <| s.modifyLog (log ++ ·)
| .error .cancelled s => .error .cancelled <| s.modifyLog (log ++ ·)

/-- The `Task` of a Lake job. -/
public abbrev JobTask α := BaseIOTask (JobResult α)
Expand Down Expand Up @@ -132,7 +150,10 @@ public protected def cast (self : Job α) (h : ¬ self.kind.isAnonymous) : Job (
{task, caption}

@[inline] public protected def error [OptDataKind α] (log : Log := {}) (caption := "") : Job α :=
.ofTask (Task.pure (.error 0 {log})) caption
.ofTask (Task.pure (.error (.errorLogged 0) {log})) caption

@[inline] public protected def cancelled [OptDataKind α] (caption := "") : Job α :=
.ofTask (Task.pure (.error .cancelled {})) caption

@[inline] public protected def pure [kind : OptDataKind α] (a : α) (log : Log := {}) (caption := "") : Job α :=
.ofTask (Task.pure (.ok a {log})) caption
Expand Down
24 changes: 15 additions & 9 deletions src/lake/Lake/Build/Job/Monad.lean
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,14 @@ namespace Job
[OptDataKind α] (act : JobM α) (caption := "")
: SpawnM (Job α) := .ofFn fun fetch pkg? stack store ctx _ =>
.ofTask (caption := caption) <$> Task.pure <$>
(withLoggedIO act).toFn fetch pkg? stack store ctx {}
(JobResult.ofLogResult <$> (withLoggedIO act).toFn fetch pkg? stack store ctx {})

/-- Spawn a job that asynchronously performs `act`. -/
@[nospecialize] public protected def async
[OptDataKind α] (act : JobM α) (prio := Task.Priority.default) (caption := "")
: SpawnM (Job α) := .ofFn fun fetch pkg? stack store ctx _ =>
.ofTask (caption := caption) <$> BaseIO.asTask (prio := prio) do
(withLoggedIO act).toFn fetch pkg? stack store ctx {}
JobResult.ofLogResult <$> (withLoggedIO act).toFn fetch pkg? stack store ctx {}

/-- Wait for a job to complete and return the result. -/
@[inline] public protected def wait (self : Job α) : BaseIO (JobResult α) := do
Expand All @@ -189,8 +189,9 @@ Logs the job's log and throws if there was an error.
-/
public protected def await (self : Job α) : LogIO α := do
match (← self.wait) with
| .error n {log, ..} => log.replay; throw n
| .ok a {log, ..} => log.replay; pure a
| .error (.errorLogged n) {log, ..} => log.replay; throw n
| .error .cancelled _ => throw 0

/-- Apply `f` asynchronously to the job's output. -/
@[nospecialize] public protected def mapM
Expand All @@ -201,8 +202,8 @@ public protected def await (self : Job α) : LogIO α := do
BaseIO.mapTask (t := task) (prio := prio) (sync := sync) fun
| .ok a s =>
let trace := mixTrace trace s.trace
withLoggedIO (f a) |>.toFn fetch pkg? stack store ctx {s with trace}
| .error n s => return .error n s
JobResult.ofLogResult <$> (withLoggedIO (f a)).toFn fetch pkg? stack store ctx {s with trace}
| .error e s => return .error e s

/--
Apply `f` asynchronously to the job's output
Expand All @@ -220,8 +221,9 @@ and asynchronously await the resulting job.
| .ok job sa =>
return job.task.map (prio := prio) (sync := true) fun
| .ok b sb => .ok b {sa.merge sb with trace := sb.trace}
| .error e sb => .error ⟨sa.log.size + e.val⟩ {sa.merge sb with trace := sb.trace}
| .error e sa => return Task.pure (.error e sa)
| .error (.errorLogged e) sb => .error (.errorLogged ⟨sa.log.size + e.val⟩) {sa.merge sb with trace := sb.trace}
| .error e sb => .error e {sa.merge sb with trace := sb.trace}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of duplicating the sa.merge sb, you could consider an inner match just to populate the first argument of .error

| .error e sa => return Task.pure (.error (.errorLogged e) sa)
| .error e sa => return Task.pure (.error e sa)

/--
Expand All @@ -246,14 +248,18 @@ results of `a` and `b`. The job `c` errors if either `a` or `b` error.
: Job γ :=
self.zipResultWith (other := other) (prio := prio) (sync := sync) fun
| .ok a sa, .ok b sb => .ok (f a b) (sa.merge sb)
| ra, rb => .error 0 (ra.state.merge rb.state)
| .error (.errorLogged _) sa, rb => .error (.errorLogged 0) (sa.merge rb.state)
| ra, .error (.errorLogged _) sb => .error (.errorLogged 0) (ra.state.merge sb)
| ra, rb => .error .cancelled (ra.state.merge rb.state)

/-- Merges this job with another, discarding its output and trace. -/
public def add (self : Job α) (other : Job β) : Job α :=
have : OptDataKind α := self.kind
self.zipResultWith (other := other) fun
| .ok a sa, .ok _ sb => .ok a {sa.merge sb with trace := sa.trace}
| ra, rb => .error 0 {ra.state.merge rb.state with trace := ra.state.trace}
| .error (.errorLogged _) sa, rb => .error (.errorLogged 0) {sa.merge rb.state with trace := sa.trace}
| ra, .error (.errorLogged _) sb => .error (.errorLogged 0) {ra.state.merge sb with trace := ra.state.trace}
| ra, rb => .error .cancelled {ra.state.merge rb.state with trace := ra.state.trace}

/-- Merges this job with another, discarding both outputs. -/
public def mix (self : Job α) (other : Job β) : Job Unit :=
Expand Down
14 changes: 11 additions & 3 deletions src/lake/Lake/Build/Job/Register.lean
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,26 @@ public def Job.renew (self : Job α) : Job α :=
have : OptDataKind α := self.kind
self.mapResult (sync := true) fun
| .ok a s => .ok a s.renew
| .error _ s => .error 0 s.renew
| .error (.errorLogged _) s => .error (.errorLogged 0) s.renew
| .error e s => .error e s.renew

/--
Registers the job for the top-level build monitor,
(e.g., the Lake CLI progress UI), assigning it `caption`.

When the build is being cancelled, registration is skipped: the job still
exists for dependency propagation but is not surfaced in the monitor output,
so interrupted pending jobs do not appear as failures.
-/
@[inline] public def registerJob
[Monad m] [MonadLiftT (ST IO.RealWorld) m] [MonadBuild m]
[Monad m] [MonadLiftT (ST IO.RealWorld) m] [MonadLiftT BaseIO m] [MonadBuild m]
(caption : String) (job : Job α) (optional := false)
: m (Job α) := do
let ctx ← getBuildContext
if let some tk := ctx.cancelling? then
if ← tk.isSet then return job.renew
let job : Job α := {job with caption, optional}
(← getBuildContext).registeredJobs.modify (·.push job)
ctx.registeredJobs.modify (·.push job)
return job.renew

/-- Wraps stray I/O, logs, and errors in `x` into the produced job. -/
Expand Down
9 changes: 7 additions & 2 deletions src/lake/Lake/Build/Module.lean
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ private structure ModuleImportData where
let impSet := if imp.includeSelf then impSet.insert imp.module else impSet
.ok impSet s
| .error e s => .error e s
| .error .cancelled _ =>
match r with
| .ok _ => .error .cancelled r.state
| .error _ _ => r
| .error _ _ =>
let entry := LogEntry.error s!"{fileName}: bad import '{imp.module.name}'"
match r with
| .ok _ s => .error 0 (s.logEntry entry)
| .error e s => .error e (s.logEntry entry)
| .ok _ s => .error (.errorLogged 0) (s.logEntry entry)
| .error (.errorLogged e) s => .error (.errorLogged e) (s.logEntry entry)
| .error .cancelled s => .error .cancelled s
Comment on lines +95 to +96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Marginal, but perhaps faster as

Suggested change
| .error (.errorLogged e) s => .error (.errorLogged e) (s.logEntry entry)
| .error .cancelled s => .error .cancelled s
| .error e s => .error e <| match e with | .errorLogged _ => s.logEntry entry | _ => s

Copy link
Contributor Author

@marcelolynch marcelolynch Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks a bit cryptic, I'm tempted to leave as-is

return Job.ofTask <| task.map (sync := true) fun
| .ok impSet s => .ok impSet.toArray s
| .error e s => .error e s
Expand Down
46 changes: 34 additions & 12 deletions src/lake/Lake/Build/Run.lean
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ private structure MonitorContext where
showTime : Bool
/-- How often to poll jobs (in milliseconds). -/
updateFrequency : Nat
/-- Stop the monitor after the first required target failure is detected. -/
stopOnFirstError : Bool
/-- When set to `true`, no new build jobs are scheduled. -/
cancelling? : Option IO.CancelToken

@[inline, implicit_reducible] def MonitorContext.logger (ctx : MonitorContext) : MonadLog BaseIO :=
.stream ctx.out ctx.outLv ctx.useAnsi
Expand Down Expand Up @@ -118,6 +122,7 @@ private def reportJob (job : OpaqueJob) : MonitorM PUnit := do
let {jobNo, totalJobs, ..} ← get
let {failLv, outLv, showOptional, out, useAnsi, showProgress, minAction, showTime, ..} ← read
let {task, caption, optional, ..} := job
if let .error .cancelled _ := task.get then return -- skip cancelled jobs: not a failure
let {log, action, wantsRebuild, buildTime, ..} := task.get.state
let maxLv := log.maxLv
let failed := strictAnd log.hasEntries (maxLv ≥ failLv)
Expand Down Expand Up @@ -178,8 +183,13 @@ private def sleep : MonitorM PUnit := do
let now ← IO.monoMsNow
modify fun s => {s with lastUpdate := now}

private partial def loop (unfinished : Array OpaqueJob) : MonitorM PUnit := do
private partial def loop (unfinished : Array OpaqueJob) : MonitorM PUnit := do
let (running, unfinished) ← poll unfinished
-- On the first required-target failure with `--stop-on-first-error`,
-- cancel pending job scheduling and let running tasks drain to completion.
if (← read).stopOnFirstError && !(← get).failures.isEmpty then
if let some tk := (← read).cancelling? then
tk.set
if h : 0 < unfinished.size then
renderProgress running unfinished h
sleep
Expand All @@ -203,7 +213,10 @@ public structure MonitorResult where
@[inline] def MonitorResult.isOk (self : MonitorResult) : Bool :=
self.failures.isEmpty

def mkMonitorContext (cfg : BuildConfig) (jobs : JobQueue) : BaseIO MonitorContext := do
def mkMonitorContext
(cfg : BuildConfig) (jobs : JobQueue)
(cancelling? : Option IO.CancelToken := none) :
BaseIO MonitorContext := do
let out ← cfg.out.get
let useAnsi ← cfg.ansiMode.isEnabled out
let outLv := cfg.outLv
Expand All @@ -217,6 +230,8 @@ def mkMonitorContext (cfg : BuildConfig) (jobs : JobQueue) : BaseIO MonitorConte
return {
jobs, out, failLv, outLv, minAction, showOptional
useAnsi, showProgress, showTime, updateFrequency
stopOnFirstError := cfg.stopOnFirstError
cancelling?
}

def monitorJobs'
Expand Down Expand Up @@ -253,6 +268,7 @@ public def monitorJobs
let ctx := {
jobs, out, failLv, outLv, minAction, showOptional
useAnsi, showProgress, showTime, updateFrequency
stopOnFirstError := false, cancelling? := none
}
monitorJobs' ctx initJobs initFailures resetCtrl

Expand Down Expand Up @@ -305,9 +321,10 @@ instance : CoeOut (BuildResult α) MonitorResult := ⟨BuildResult.toMonitorResu
def monitorJob (ctx : MonitorContext) (job : Job α) : BaseIO (BuildResult α) := do
let result ← monitorJobs' ctx #[job]
if result.isOk then
if let some a ← job.wait? then
return {toMonitorResult := result, out := .ok a}
else
match (← job.wait) with
| .ok a _ => return {toMonitorResult := result, out := .ok a}
| .error .cancelled _ => return {toMonitorResult := result, out := .error "build cancelled"}
| .error _ _ =>
-- Computation job failed but was unreported in the monitor. This should be impossible.
return {toMonitorResult := result, out := .error <|
"uncaught top-level build failure (this is likely a bug in Lake)"}
Expand All @@ -316,6 +333,7 @@ def monitorJob (ctx : MonitorContext) (job : Job α) : BaseIO (BuildResult α) :

def mkBuildContext'
(ws : Workspace) (cfg : BuildConfig) (jobs : JobQueue)
(cancelling? : Option IO.CancelToken := none)
: BaseIO BuildContext := return {
opaqueWs := ws
toBuildConfig := cfg
Expand All @@ -327,6 +345,7 @@ def mkBuildContext'
registeredJobs := jobs
leanTrace := .ofHash (pureHash ws.lakeEnv.leanGithash)
s!"Lean {Lean.versionStringCore}, commit {ws.lakeEnv.leanGithash}"
cancelling?
}

def Workspace.startBuild
Expand Down Expand Up @@ -360,8 +379,9 @@ public def Workspace.runFetchM
(ws : Workspace) (build : FetchM α) (cfg : BuildConfig := {}) (caption := "job computation")
: IO α := do
let jobs ← mkJobQueue
let mctx ← mkMonitorContext cfg jobs
let bctx ← mkBuildContext' ws cfg jobs
let cancelling? := some (← IO.CancelToken.new)
let mctx ← mkMonitorContext cfg jobs cancelling?
let bctx ← mkBuildContext' ws cfg jobs cancelling?
let job ← startBuild bctx build caption
let result ← monitorJob mctx job
finalizeBuild cfg bctx mctx result
Expand All @@ -370,9 +390,10 @@ def monitorBuild (mctx : MonitorContext) (job : Job (Job α)) : BaseIO (BuildRes
let result ← monitorJob mctx job
match result.out with
| .ok job =>
if let some a ← job.wait? then
return {result with out := .ok a}
else
match (← job.wait) with
| .ok a _ => return {result with out := .ok a}
| .error .cancelled _ => return {result with out := .error "build cancelled"}
| .error _ _ =>
-- Job failed but was unreported in the monitor. It was likely not properly registered.
return {result with out := .error <|
"uncaught top-level build failure (this is likely a bug in the build script)"}
Expand Down Expand Up @@ -400,8 +421,9 @@ public def Workspace.runBuild
(ws : Workspace) (build : FetchM (Job α)) (cfg : BuildConfig := {})
: IO α := do
let jobs ← mkJobQueue
let mctx ← mkMonitorContext cfg jobs
let bctx ← mkBuildContext' ws cfg jobs
let cancelling? := some (← IO.CancelToken.new)
let mctx ← mkMonitorContext cfg jobs cancelling?
let bctx ← mkBuildContext' ws cfg jobs cancelling?
let job ← startBuild bctx build
let result ← monitorBuild mctx job
finalizeBuild cfg bctx mctx result
Expand Down
1 change: 1 addition & 0 deletions src/lake/Lake/CLI/Help.lean
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ BASIC OPTIONS:
--reconfigure, -R elaborate configuration files instead of using OLeans
--keep-toolchain do not update toolchain on workspace update
--no-build exit immediately if a build target is not up-to-date
--stop-on-first-error stop building and exit after the first required target fails
--no-cache build packages locally; do not download build caches
--try-cache attempt to download build caches for supported packages
--json, -J output JSON-formatted results (in `lake query`)
Expand Down
3 changes: 3 additions & 0 deletions src/lake/Lake/CLI/Main.lean
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public structure LakeOptions where
oldMode : Bool := false
trustHash : Bool := true
noBuild : Bool := false
stopOnFirstError : Bool := false
noCache : Option Bool := none
failLv : LogLevel := .error
outLv? : Option LogLevel := .none
Expand Down Expand Up @@ -127,6 +128,7 @@ def LakeOptions.mkBuildConfig
oldMode := opts.oldMode
trustHash := opts.trustHash
noBuild := opts.noBuild
stopOnFirstError := opts.stopOnFirstError
verbosity := opts.verbosity
failLv := opts.failLv
outLv := opts.outLv
Expand Down Expand Up @@ -242,6 +244,7 @@ def lakeLongOption : (opt : String) → CliM PUnit
| "--text" => modifyThe LakeOptions ({· with outFormat := .text})
| "--json" => modifyThe LakeOptions ({· with outFormat := .json})
| "--no-build" => modifyThe LakeOptions ({· with noBuild := true})
| "--stop-on-first-error" => modifyThe LakeOptions ({· with stopOnFirstError := true})
| "--no-cache" => modifyThe LakeOptions ({· with noCache := true})
| "--try-cache" => modifyThe LakeOptions ({· with noCache := false})
| "--rehash" => modifyThe LakeOptions ({· with trustHash := false})
Expand Down
2 changes: 2 additions & 0 deletions tests/lake/tests/stopOnFirstError/Fail1.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- This module intentionally has a syntax error to test --stop-on-first-error
#check (this is not valid lean syntax
2 changes: 2 additions & 0 deletions tests/lake/tests/stopOnFirstError/Fail2.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- This module intentionally has a syntax error to test --stop-on-first-error
#check (this is not valid lean syntax
1 change: 1 addition & 0 deletions tests/lake/tests/stopOnFirstError/clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rm -rf .lake lake-manifest.json *.produced.out
Loading
Loading