diff --git a/examples/model_examples/scikit-learn/Hamilton for ML dataflows.ipynb b/examples/model_examples/scikit-learn/Hamilton for ML dataflows.ipynb
new file mode 100644
index 000000000..1326ea202
--- /dev/null
+++ b/examples/model_examples/scikit-learn/Hamilton for ML dataflows.ipynb
@@ -0,0 +1,547 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "47ed8323-e689-464c-83ec-1ee98d2c2585",
+ "metadata": {},
+ "source": [
+ "# Hamilton for ML dataflows\n",
+ "\n",
+ "#### Requirements:\n",
+ "\n",
+ "- Install dependencies (listed in `requirements.txt`)\n",
+ "\n",
+ "More details [here](https://github.com/DAGWorks-Inc/hamilton/blob/main/examples/model_examples/scikit-learn/README.md#using-hamilton-for-ml-dataflows).\n",
+ "\n",
+ "***\n",
+ "\n",
+ "Uncomment and run the cell below if you are in a Google Colab environment. It will:\n",
+ "1. Mount google drive. You will be asked to authenticate and give permissions.\n",
+ "2. Change directory to google drive.\n",
+ "3. Make a directory \"hamilton-tutorials\"\n",
+ "4. Change directory to it.\n",
+ "5. Clone this repository to your google drive\n",
+ "6. Move your current directory to the hello_world example\n",
+ "7. Install requirements.\n",
+ "\n",
+ "This means that any modifications will be saved, and you won't lose them if you close your browser."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d5e12e1c-a8b2-477a-a9ff-6257ab587734",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## 1. Mount google drive\n",
+ "# from google.colab import drive\n",
+ "# drive.mount('/content/drive')\n",
+ "## 2. Change directory to google drive.\n",
+ "# %cd /content/drive/MyDrive\n",
+ "## 3. Make a directory \"hamilton-tutorials\"\n",
+ "# !mkdir hamilton-tutorials\n",
+ "## 4. Change directory to it.\n",
+ "# %cd hamilton-tutorials\n",
+ "## 5. Clone this repository to your google drive\n",
+ "# !git clone https://github.com/DAGWorks-Inc/hamilton/\n",
+ "## 6. Move your current directory to the hello_world example\n",
+ "# %cd hamilton/examples/hello_world\n",
+ "## 7. Install requirements.\n",
+ "# %pip install -r requirements.txt\n",
+ "# clear_output() # optionally clear outputs\n",
+ "# To check your current working directory you can type `!pwd` in a cell and run it."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9115ca99-cb3b-4dc3-8218-fa26b00d2199",
+ "metadata": {},
+ "source": [
+ "***\n",
+ "Here we have a simple example showing how you can write a ML training and evaluation workflow with Hamilton. \n",
+ "***"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "04fa1ff7-74f7-4193-9e1f-c17d9e68efc5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\"\"\"\n",
+ "Example script showing how one might setup a generic model training pipeline that is quickly configurable.\n",
+ "\"\"\"\n",
+ "\n",
+ "import digit_loader\n",
+ "import iris_loader\n",
+ "import my_train_evaluate_logic\n",
+ "\n",
+ "from hamilton import base, driver\n",
+ "\n",
+ "\n",
+ "def get_data_loader(data_set: str):\n",
+ " \"\"\"Returns the module to load that will procur data -- the data loaders all have to define the same functions.\"\"\"\n",
+ " if data_set == \"iris\":\n",
+ " return iris_loader\n",
+ " elif data_set == \"digits\":\n",
+ " return digit_loader\n",
+ " else:\n",
+ " raise ValueError(f\"Unknown data_name {data_set}.\")\n",
+ "\n",
+ "\n",
+ "def get_model_config(model_type: str) -> dict:\n",
+ " \"\"\"Returns model type specific configuration\"\"\"\n",
+ " if model_type == \"svm\":\n",
+ " return {\"clf\": \"svm\", \"gamma\": 0.001}\n",
+ " elif model_type == \"logistic\":\n",
+ " return {\"clf\": \"logistic\", \"penalty\": \"l2\"}\n",
+ " else:\n",
+ " raise ValueError(f\"Unsupported model {model_type}.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "88ccbc7c-f265-47fa-a921-f26ac3ed7094",
+ "metadata": {},
+ "source": [
+ "***\n",
+ "For the purpose of this experiment, lets apply the following configuration:\n",
+ "\n",
+ "- `_data_set` = 'digits'\n",
+ "- `_model_type` = 'logistic'\n",
+ "\n",
+ "More details [here](https://github.com/DAGWorks-Inc/hamilton/blob/main/examples/model_examples/scikit-learn/README.md).\n",
+ "***"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "9e5e7282-8286-4055-847f-adb168420da0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "classification_report :\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 0.99 0.99 91\n",
+ " 1 0.92 0.95 0.94 84\n",
+ " 2 0.98 1.00 0.99 83\n",
+ " 3 0.99 0.98 0.98 81\n",
+ " 4 0.95 0.99 0.97 95\n",
+ " 5 0.98 0.94 0.96 97\n",
+ " 6 0.97 0.98 0.97 85\n",
+ " 7 0.98 0.98 0.98 96\n",
+ " 8 0.91 0.90 0.91 96\n",
+ " 9 0.96 0.93 0.94 91\n",
+ "\n",
+ " accuracy 0.96 899\n",
+ " macro avg 0.96 0.96 0.96 899\n",
+ "weighted avg 0.96 0.96 0.96 899\n",
+ "\n",
+ "confusion_matrix :\n",
+ " [[90 0 0 0 1 0 0 0 0 0]\n",
+ " [ 0 80 0 0 1 0 1 0 2 0]\n",
+ " [ 0 0 83 0 0 0 0 0 0 0]\n",
+ " [ 0 0 0 79 0 0 0 1 0 1]\n",
+ " [ 0 1 0 0 94 0 0 0 0 0]\n",
+ " [ 0 1 0 1 1 91 0 1 0 2]\n",
+ " [ 0 0 0 0 0 0 83 0 2 0]\n",
+ " [ 0 0 0 0 1 0 0 94 0 1]\n",
+ " [ 0 5 2 0 0 1 2 0 86 0]\n",
+ " [ 0 0 0 0 1 1 0 0 4 85]]\n",
+ "fit_clf :\n",
+ " LogisticRegression()\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/flaviassantos/github/hamilton/venv/lib/python3.11/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
+ "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+ "\n",
+ "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
+ " https://scikit-learn.org/stable/modules/preprocessing.html\n",
+ "Please also refer to the documentation for alternative solver options:\n",
+ " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
+ " n_iter_i = _check_optimize_result(\n"
+ ]
+ }
+ ],
+ "source": [
+ "_data_set = 'digits' # the data set to load\n",
+ "_model_type = 'logistic' # the model type to fit and evaluate with\n",
+ "\n",
+ "dag_config = {\n",
+ " \"test_size_fraction\": 0.5,\n",
+ " \"shuffle_train_test_split\": True,\n",
+ "}\n",
+ "# augment config\n",
+ "dag_config.update(get_model_config(_model_type))\n",
+ "# get module with functions to load data\n",
+ "data_module = get_data_loader(_data_set)\n",
+ "# set the desired result container we want\n",
+ "adapter = base.DefaultAdapter()\n",
+ "\"\"\"\n",
+ "What's cool about this, is that by simply changing the `dag_config` and the `data_module` we can\n",
+ "reuse the logic in the `my_train_evaluate_logic` module very easily for different contexts and purposes if\n",
+ "want to setup a generic model fitting and prediction dataflow!\n",
+ "E.g. if we want to support a new data set, then we just need to add a new data loading module.\n",
+ "E.g. if we want to support a new model type, then we just need to add a single conditional function\n",
+ " to my_train_evaluate_logic.\n",
+ "\"\"\"\n",
+ "dr = driver.Driver(dag_config, data_module, my_train_evaluate_logic, adapter=adapter)\n",
+ "# ensure you have done \"pip install \"sf-hamilton[visualization]\"\" for the following to work:\n",
+ "# dr.visualize_execution(['classification_report', 'confusion_matrix', 'fit_clf'],\n",
+ "# f'./model_dag_{_data_set}_{_model_type}.dot', {\"format\": \"png\"})\n",
+ "results = dr.execute([\"classification_report\", \"confusion_matrix\", \"fit_clf\"])\n",
+ "for k, v in results.items():\n",
+ " print(k, \":\\n\", v)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2035065c-c409-4c21-bd11-733e74623226",
+ "metadata": {},
+ "source": [
+ "***\n",
+ "Here is the graph of execution for the digits data set:\n",
+ "***"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "17522217-8a09-46da-8b8d-0ba97d278bdc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dr.visualize_execution(['classification_report', 'confusion_matrix', 'fit_clf'],\n",
+ " f'./model_dag_{_data_set}_{_model_type}.dot', {\"format\": \"png\"})"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "hamilton",
+ "language": "python",
+ "name": "hamilton"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}