Merge branch 'ben-denham:main' into feature/unit-tests-storage

ben-denham · May 19, 2024 · b462bed · b462bed
2 parents 525eaf4 + 16d4656
commit b462bed
Show file tree

Hide file tree

Showing 10 changed files with 151 additions and 240 deletions.
diff --git a/README.md b/README.md
@@ -91,8 +91,7 @@ if __name__ == '__main__':
 ![Animated GIF of labtech demo on the command-line](https://ben-denham.github.io/labtech/images/labtech-demo.gif)
 
 Labtech can also produce graphical progress bars in
-[Jupyter](https://jupyter.org/) when the `Lab` is initialized with
-`notebook=True`:
+[Jupyter](https://jupyter.org/) notebooks:
 
 ![Animated GIF of labtech demo in Jupyter](https://ben-denham.github.io/labtech/images/labtech-demo-jupyter.gif)
 

diff --git a/docs/cookbook.md b/docs/cookbook.md
@@ -50,10 +50,7 @@ experiments = [
     )
     for seed in range(5)
 ]
-lab = labtech.Lab(
-    storage=None,
-    notebook=True,
-)
+lab = labtech.Lab(storage=None)
 results = lab.run_tasks(experiments)
 ```
 
@@ -120,10 +117,7 @@ class ClassifierExperiment:
 experiment = ClassifierExperiment(
     classifier_task=LRClassifierTask(random_state=42),
 )
-lab = labtech.Lab(
-    storage=None,
-    notebook=True,
-)
+lab = labtech.Lab(storage=None)
 results = lab.run_tasks([experiment])
 ```
 
@@ -182,10 +176,7 @@ experiments = [
     ClassifierExperiment(classifier_task=classifier_task)
     for classifier_task in classifier_tasks
 ]
-lab = labtech.Lab(
-    storage=None,
-    notebook=True,
-)
+lab = labtech.Lab(storage=None)
 results = lab.run_tasks(experiments)
 ```
 
@@ -242,10 +233,7 @@ experiments = [
     )
     for dataset in DatasetOption
 ]
-lab = labtech.Lab(
-    storage=None,
-    notebook=True,
-)
+lab = labtech.Lab(storage=None)
 results = lab.run_tasks(experiments)
 ```
 
@@ -291,7 +279,6 @@ experiments = [
 ]
 lab = labtech.Lab(
     storage=None,
-    notebook=True,
     context={
         'DATASETS': DATASETS,
     },
@@ -340,7 +327,6 @@ experiments = [
 ]
 lab = labtech.Lab(
     storage=None,
-    notebook=True,
     context={
         'within_task_workers': 4,
     },
@@ -521,10 +507,7 @@ class TabularTask:
         })
 
 
-lab = labtech.Lab(
-    storage='storage/parquet_example',
-    notebook=True,
-)
+lab = labtech.Lab(storage='storage/parquet_example')
 lab.run_tasks([TabularTask()])
 ```
 
@@ -602,10 +585,7 @@ experiments = [
     )
     for seed in range(100)
 ]
-lab = labtech.Lab(
-    storage=LocalFsspecStorage('storage/fsspec_example'),
-    notebook=True,
-)
+lab = labtech.Lab(storage=LocalFsspecStorage('storage/fsspec_example'))
 results = lab.run_tasks(experiments)
 ```
 
@@ -676,10 +656,7 @@ experiments = [
 aggregation_task = AggregationTask(
     sub_tasks=experiments,
 )
-lab = labtech.Lab(
-    storage='storage/aggregation_lab',
-    notebook=True,
-)
+lab = labtech.Lab(storage='storage/aggregation_lab')
 result = lab.run_task(aggregation_task)
 ```
 
@@ -718,10 +695,7 @@ experiments = [
 aggregation_task = AggregationTask(
     sub_tasks=experiments,
 )
-lab = labtech.Lab(
-    storage=None,
-    notebook=True,
-)
+lab = labtech.Lab(storage=None)
 results = lab.run_tasks([
     aggregation_task,
     # Include intermediate tasks to access their results
@@ -751,10 +725,7 @@ experiments = [
 aggregation_task = AggregationTask(
     sub_tasks=experiments,
 )
-lab = labtech.Lab(
-    storage=None,
-    notebook=True,
-)
+lab = labtech.Lab(storage=None)
 result = lab.run_task(
     aggregation_task,
     keep_nested_results=True,
@@ -817,10 +788,7 @@ task_c = StepC(
     task_b=task_b,
 )
 
-lab = labtech.Lab(
-    storage=None,
-    notebook=True,
-)
+lab = labtech.Lab(storage=None)
 result = lab.run_task(task_c)
 print(result)
 ```
@@ -897,10 +865,7 @@ runs = [
 ]
 
 mlflow.set_experiment('example_labtech_experiment')
-lab = labtech.Lab(
-    storage=None,
-    notebook=True,
-)
+lab = labtech.Lab(storage=None)
 results = lab.run_tasks(runs)
 ```
 
@@ -909,6 +874,16 @@ results = lab.run_tasks(runs)
 > `mlflow.start_run()`, labtech wraps the entire call to the `run()`
 > method of your task in order to track execution times in mlflow.
 
+> Note: Because mlflow logging will be performed from a separate
+> process for each task, you must use an mlflow tracking backend that
+> supports multiple simultaneous connections. Specifically, using an
+> SQLite backend directly from multiple processes may result in
+> database locking errors. Instead, consider using local files (the
+> default used by mlflow), an SQL database that runs as a server (e.g.
+> postgresql, mysql, or mssql), or running a local mlflow tracking
+> server (which may itself connect to an sqlite database). For more
+> details, see the [mlflow backend
+> documentation](https://mlflow.org/docs/latest/tracking/backend-stores.html).
 
 ### Why do I see the following error: `An attempt has been made to start a new process before the current process has finished`?
 
@@ -956,10 +931,7 @@ def main():
         )
         for seed in range(1000)
     ]
-    lab = labtech.Lab(
-        storage='storage/guarded_lab',
-        notebook=True,
-    )
+    lab = labtech.Lab(storage='storage/guarded_lab')
     result = lab.run_tasks(experiments)
     print(result)
 

diff --git a/docs/tutorial.md b/docs/tutorial.md
@@ -100,10 +100,7 @@ called `storage/tutorial/classification_lab_1` and to display
 notebook-friendly progress bars:
 
 ``` {.python .code}
-lab = labtech.Lab(
-    storage='storage/tutorial/classification_lab_1',
-    notebook=True,
-)
+lab = labtech.Lab(storage='storage/tutorial/classification_lab_1')
 ```
 
 Finally, we create a task instance of `ClassifierExperiment` and call
@@ -211,10 +208,7 @@ we'll keep caches for the new definition separate by constructing a
 new lab that uses a different storage directory:
 
 ``` {.python .code}
-lab = labtech.Lab(
-    storage='storage/tutorial/classification_lab_2',
-    notebook=True,
-)
+lab = labtech.Lab(storage='storage/tutorial/classification_lab_2')
 results = lab.run_tasks(classifier_experiments)
 ```
 
@@ -392,10 +386,7 @@ classifier_experiments = [
     ]
 ]
 
-lab = labtech.Lab(
-    storage='storage/tutorial/classification_lab_3',
-    notebook=True,
-)
+lab = labtech.Lab(storage='storage/tutorial/classification_lab_3')
 
 results = lab.run_tasks(classifier_experiments)
 for experiment, prob_y in results.items():
@@ -460,7 +451,6 @@ classifier_experiments = [
 
 lab = labtech.Lab(
     storage='storage/tutorial/classification_lab_4',
-    notebook=True,
     context={
         'DATASETS': DATASETS,
     },
@@ -668,7 +658,6 @@ import mlflow
 mlflow.set_experiment('example_labtech_experiment')
 lab = labtech.Lab(
     storage='storage/tutorial/classification_lab_final',
-    notebook=True,
     context={
         'DATASETS': DATASETS,
     },

diff --git a/examples/basic.ipynb b/examples/basic.ipynb
@@ -128,7 +128,6 @@
     "\n",
     "lab = labtech.Lab(\n",
     "    storage='storage/ipy_basic_lab',\n",
-    "    notebook=True,\n",
     ")\n",
     "cached_experiments = lab.cached_tasks([Experiment])\n",
     "print(f'Clearing {len(cached_experiments)} cached experiments.')\n",