Skip to content

Commit

Permalink
Merge branch 'ben-denham:main' into feature/unit-tests-storage
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanjmcdougall authored May 19, 2024
2 parents 525eaf4 + 16d4656 commit b462bed
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 240 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ if __name__ == '__main__':
![Animated GIF of labtech demo on the command-line](https://ben-denham.github.io/labtech/images/labtech-demo.gif)

Labtech can also produce graphical progress bars in
[Jupyter](https://jupyter.org/) when the `Lab` is initialized with
`notebook=True`:
[Jupyter](https://jupyter.org/) notebooks:

![Animated GIF of labtech demo in Jupyter](https://ben-denham.github.io/labtech/images/labtech-demo-jupyter.gif)

Expand Down
72 changes: 22 additions & 50 deletions docs/cookbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,7 @@ experiments = [
)
for seed in range(5)
]
lab = labtech.Lab(
storage=None,
notebook=True,
)
lab = labtech.Lab(storage=None)
results = lab.run_tasks(experiments)
```

Expand Down Expand Up @@ -120,10 +117,7 @@ class ClassifierExperiment:
experiment = ClassifierExperiment(
classifier_task=LRClassifierTask(random_state=42),
)
lab = labtech.Lab(
storage=None,
notebook=True,
)
lab = labtech.Lab(storage=None)
results = lab.run_tasks([experiment])
```

Expand Down Expand Up @@ -182,10 +176,7 @@ experiments = [
ClassifierExperiment(classifier_task=classifier_task)
for classifier_task in classifier_tasks
]
lab = labtech.Lab(
storage=None,
notebook=True,
)
lab = labtech.Lab(storage=None)
results = lab.run_tasks(experiments)
```

Expand Down Expand Up @@ -242,10 +233,7 @@ experiments = [
)
for dataset in DatasetOption
]
lab = labtech.Lab(
storage=None,
notebook=True,
)
lab = labtech.Lab(storage=None)
results = lab.run_tasks(experiments)
```

Expand Down Expand Up @@ -291,7 +279,6 @@ experiments = [
]
lab = labtech.Lab(
storage=None,
notebook=True,
context={
'DATASETS': DATASETS,
},
Expand Down Expand Up @@ -340,7 +327,6 @@ experiments = [
]
lab = labtech.Lab(
storage=None,
notebook=True,
context={
'within_task_workers': 4,
},
Expand Down Expand Up @@ -521,10 +507,7 @@ class TabularTask:
})
lab = labtech.Lab(
storage='storage/parquet_example',
notebook=True,
)
lab = labtech.Lab(storage='storage/parquet_example')
lab.run_tasks([TabularTask()])
```

Expand Down Expand Up @@ -602,10 +585,7 @@ experiments = [
)
for seed in range(100)
]
lab = labtech.Lab(
storage=LocalFsspecStorage('storage/fsspec_example'),
notebook=True,
)
lab = labtech.Lab(storage=LocalFsspecStorage('storage/fsspec_example'))
results = lab.run_tasks(experiments)
```

Expand Down Expand Up @@ -676,10 +656,7 @@ experiments = [
aggregation_task = AggregationTask(
sub_tasks=experiments,
)
lab = labtech.Lab(
storage='storage/aggregation_lab',
notebook=True,
)
lab = labtech.Lab(storage='storage/aggregation_lab')
result = lab.run_task(aggregation_task)
```

Expand Down Expand Up @@ -718,10 +695,7 @@ experiments = [
aggregation_task = AggregationTask(
sub_tasks=experiments,
)
lab = labtech.Lab(
storage=None,
notebook=True,
)
lab = labtech.Lab(storage=None)
results = lab.run_tasks([
aggregation_task,
# Include intermediate tasks to access their results
Expand Down Expand Up @@ -751,10 +725,7 @@ experiments = [
aggregation_task = AggregationTask(
sub_tasks=experiments,
)
lab = labtech.Lab(
storage=None,
notebook=True,
)
lab = labtech.Lab(storage=None)
result = lab.run_task(
aggregation_task,
keep_nested_results=True,
Expand Down Expand Up @@ -817,10 +788,7 @@ task_c = StepC(
task_b=task_b,
)
lab = labtech.Lab(
storage=None,
notebook=True,
)
lab = labtech.Lab(storage=None)
result = lab.run_task(task_c)
print(result)
```
Expand Down Expand Up @@ -897,10 +865,7 @@ runs = [
]
mlflow.set_experiment('example_labtech_experiment')
lab = labtech.Lab(
storage=None,
notebook=True,
)
lab = labtech.Lab(storage=None)
results = lab.run_tasks(runs)
```

Expand All @@ -909,6 +874,16 @@ results = lab.run_tasks(runs)
> `mlflow.start_run()`, labtech wraps the entire call to the `run()`
> method of your task in order to track execution times in mlflow.
> Note: Because mlflow logging will be performed from a separate
> process for each task, you must use an mlflow tracking backend that
> supports multiple simultaneous connections. Specifically, using an
> SQLite backend directly from multiple processes may result in
> database locking errors. Instead, consider using local files (the
> default used by mlflow), an SQL database that runs as a server (e.g.
> postgresql, mysql, or mssql), or running a local mlflow tracking
> server (which may itself connect to an sqlite database). For more
> details, see the [mlflow backend
> documentation](https://mlflow.org/docs/latest/tracking/backend-stores.html).
### Why do I see the following error: `An attempt has been made to start a new process before the current process has finished`?

Expand Down Expand Up @@ -956,10 +931,7 @@ def main():
)
for seed in range(1000)
]
lab = labtech.Lab(
storage='storage/guarded_lab',
notebook=True,
)
lab = labtech.Lab(storage='storage/guarded_lab')
result = lab.run_tasks(experiments)
print(result)
Expand Down
17 changes: 3 additions & 14 deletions docs/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,7 @@ called `storage/tutorial/classification_lab_1` and to display
notebook-friendly progress bars:

``` {.python .code}
lab = labtech.Lab(
storage='storage/tutorial/classification_lab_1',
notebook=True,
)
lab = labtech.Lab(storage='storage/tutorial/classification_lab_1')
```

Finally, we create a task instance of `ClassifierExperiment` and call
Expand Down Expand Up @@ -211,10 +208,7 @@ we'll keep caches for the new definition separate by constructing a
new lab that uses a different storage directory:

``` {.python .code}
lab = labtech.Lab(
storage='storage/tutorial/classification_lab_2',
notebook=True,
)
lab = labtech.Lab(storage='storage/tutorial/classification_lab_2')
results = lab.run_tasks(classifier_experiments)
```

Expand Down Expand Up @@ -392,10 +386,7 @@ classifier_experiments = [
]
]
lab = labtech.Lab(
storage='storage/tutorial/classification_lab_3',
notebook=True,
)
lab = labtech.Lab(storage='storage/tutorial/classification_lab_3')
results = lab.run_tasks(classifier_experiments)
for experiment, prob_y in results.items():
Expand Down Expand Up @@ -460,7 +451,6 @@ classifier_experiments = [
lab = labtech.Lab(
storage='storage/tutorial/classification_lab_4',
notebook=True,
context={
'DATASETS': DATASETS,
},
Expand Down Expand Up @@ -668,7 +658,6 @@ import mlflow
mlflow.set_experiment('example_labtech_experiment')
lab = labtech.Lab(
storage='storage/tutorial/classification_lab_final',
notebook=True,
context={
'DATASETS': DATASETS,
},
Expand Down
1 change: 0 additions & 1 deletion examples/basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@
"\n",
"lab = labtech.Lab(\n",
" storage='storage/ipy_basic_lab',\n",
" notebook=True,\n",
")\n",
"cached_experiments = lab.cached_tasks([Experiment])\n",
"print(f'Clearing {len(cached_experiments)} cached experiments.')\n",
Expand Down
Loading

0 comments on commit b462bed

Please sign in to comment.