diff --git a/examples/DVCLive-Evidently.ipynb b/examples/DVCLive-Evidently.ipynb index 171bbd58..84329e99 100644 --- a/examples/DVCLive-Evidently.ipynb +++ b/examples/DVCLive-Evidently.ipynb @@ -1,1715 +1,1716 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true, - "authorship_tag": "ABX9TyNJAdha/v4n9zLqIfGakg0E" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WpfOFaqHcnAt" + }, + "source": [ + "# Install Evidently and DVC with DVCLive" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "executionInfo": { + "elapsed": 2337, + "status": "ok", + "timestamp": 1697468096427, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - "language_info": { - "name": "python" - } + "id": "BqWpagFPZ45W" + }, + "outputs": [], + "source": [ + "!pip uninstall -q -y sqlalchemy pyarrow ipython-sql pandas-gbq" + ] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Install Evidently and DVC with DVCLive" - ], - "metadata": { - "id": "WpfOFaqHcnAt" - } + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "executionInfo": { + "elapsed": 33615, + "status": "ok", + "timestamp": 1697468130037, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "code", - "source": [ - "!pip uninstall -q -y sqlalchemy pyarrow ipython-sql pandas-gbq" - ], - "metadata": { - "id": "BqWpagFPZ45W", - "executionInfo": { - "status": "ok", - "timestamp": 1697468096427, - "user_tz": -120, - "elapsed": 2337, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 1, - "outputs": [] + "id": "DijzqeokW595" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install -q dvc==3.25.0 dvclive==3.0.1 evidently==0.4.5 pandas==1.5.3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZyZ2sX8GcvMU" + }, + "source": [ + "# Load the data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "DijzqeokW595", - "executionInfo": { - "status": "ok", - "timestamp": 1697468130037, - "user_tz": -120, - "elapsed": 33615, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "outputs": [], - "source": [ - "%%capture\n", - "!pip install -q dvc==3.25.0 dvclive==3.0.1 evidently==0.4.5 pandas==1.5.3" - ] + "executionInfo": { + "elapsed": 1772, + "status": "ok", + "timestamp": 1697468131788, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, + "id": "ZUrB0D59XMDD", + "outputId": "9f6f5a3c-f856-4d56-a8fb-ec4483ec6127" + }, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "# Load the data" - ], - "metadata": { - "id": "ZyZ2sX8GcvMU" - } + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-10-16 14:55:29-- https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip\n", + "Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252\n", + "Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: unspecified\n", + "Saving to: ‘bike+sharing+dataset.zip’\n", + "\n", + "bike+sharing+datase [ <=> ] 273.43K 443KB/s in 0.6s \n", + "\n", + "2023-10-16 14:55:30 (443 KB/s) - ‘bike+sharing+dataset.zip’ saved [279992]\n", + "\n", + "Archive: bike+sharing+dataset.zip\n", + " inflating: Readme.txt \n", + " inflating: day.csv \n", + " inflating: hour.csv \n" + ] + } + ], + "source": [ + "!mkdir raw_data && \\\n", + " cd raw_data && \\\n", + " wget https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip && \\\n", + " unzip bike+sharing+dataset.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 357, + "status": "ok", + "timestamp": 1697468132141, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "code", - "source": [ - "!mkdir raw_data && \\\n", - " cd raw_data && \\\n", - " wget https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip && \\\n", - " unzip bike+sharing+dataset.zip" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ZUrB0D59XMDD", - "executionInfo": { - "status": "ok", - "timestamp": 1697468131788, - "user_tz": -120, - "elapsed": 1772, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - }, - "outputId": "9f6f5a3c-f856-4d56-a8fb-ec4483ec6127" - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2023-10-16 14:55:29-- https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip\n", - "Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252\n", - "Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: unspecified\n", - "Saving to: ‘bike+sharing+dataset.zip’\n", - "\n", - "bike+sharing+datase [ <=> ] 273.43K 443KB/s in 0.6s \n", - "\n", - "2023-10-16 14:55:30 (443 KB/s) - ‘bike+sharing+dataset.zip’ saved [279992]\n", - "\n", - "Archive: bike+sharing+dataset.zip\n", - " inflating: Readme.txt \n", - " inflating: day.csv \n", - " inflating: hour.csv \n" - ] - } - ] + "id": "P3XXcUrQY1EQ" + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 }, - { - "cell_type": "code", - "source": [ - "import pandas as pd" - ], - "metadata": { - "id": "P3XXcUrQY1EQ", - "executionInfo": { - "status": "ok", - "timestamp": 1697468132141, - "user_tz": -120, - "elapsed": 357, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 4, - "outputs": [] + "executionInfo": { + "elapsed": 9, + "status": "ok", + "timestamp": 1697468132141, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, + "id": "MDK0xkdbYCWg", + "outputId": "ec8d2605-144d-45ff-b442-70ba858a44a3" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "df = pd.read_csv(\"raw_data/day.csv\", header=0, sep=',', parse_dates=['dteday'])\n", - "df.head()" + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
instantdtedayseasonyrmnthholidayweekdayworkingdayweathersittempatemphumwindspeedcasualregisteredcnt
012011-01-0110106020.3441670.3636250.8058330.160446331654985
122011-01-0210100020.3634780.3537390.6960870.248539131670801
232011-01-0310101110.1963640.1894050.4372730.24830912012291349
342011-01-0410102110.2000000.2121220.5904350.16029610814541562
452011-01-0510103110.2269570.2292700.4369570.1869008215181600
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "MDK0xkdbYCWg", - "executionInfo": { - "status": "ok", - "timestamp": 1697468132141, - "user_tz": -120, - "elapsed": 9, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - }, - "outputId": "ec8d2605-144d-45ff-b442-70ba858a44a3" - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " instant dteday season yr mnth holiday weekday workingday \\\n", - "0 1 2011-01-01 1 0 1 0 6 0 \n", - "1 2 2011-01-02 1 0 1 0 0 0 \n", - "2 3 2011-01-03 1 0 1 0 1 1 \n", - "3 4 2011-01-04 1 0 1 0 2 1 \n", - "4 5 2011-01-05 1 0 1 0 3 1 \n", - "\n", - " weathersit temp atemp hum windspeed casual registered \\\n", - "0 2 0.344167 0.363625 0.805833 0.160446 331 654 \n", - "1 2 0.363478 0.353739 0.696087 0.248539 131 670 \n", - "2 1 0.196364 0.189405 0.437273 0.248309 120 1229 \n", - "3 1 0.200000 0.212122 0.590435 0.160296 108 1454 \n", - "4 1 0.226957 0.229270 0.436957 0.186900 82 1518 \n", - "\n", - " cnt \n", - "0 985 \n", - "1 801 \n", - "2 1349 \n", - "3 1562 \n", - "4 1600 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
instantdtedayseasonyrmnthholidayweekdayworkingdayweathersittempatemphumwindspeedcasualregisteredcnt
012011-01-0110106020.3441670.3636250.8058330.160446331654985
122011-01-0210100020.3634780.3537390.6960870.248539131670801
232011-01-0310101110.1963640.1894050.4372730.24830912012291349
342011-01-0410102110.2000000.2121220.5904350.16029610814541562
452011-01-0510103110.2269570.2292700.4369570.1869008215181600
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 5 - } + "text/plain": [ + " instant dteday season yr mnth holiday weekday workingday \\\n", + "0 1 2011-01-01 1 0 1 0 6 0 \n", + "1 2 2011-01-02 1 0 1 0 0 0 \n", + "2 3 2011-01-03 1 0 1 0 1 1 \n", + "3 4 2011-01-04 1 0 1 0 2 1 \n", + "4 5 2011-01-05 1 0 1 0 3 1 \n", + "\n", + " weathersit temp atemp hum windspeed casual registered \\\n", + "0 2 0.344167 0.363625 0.805833 0.160446 331 654 \n", + "1 2 0.363478 0.353739 0.696087 0.248539 131 670 \n", + "2 1 0.196364 0.189405 0.437273 0.248309 120 1229 \n", + "3 1 0.200000 0.212122 0.590435 0.160296 108 1454 \n", + "4 1 0.226957 0.229270 0.436957 0.186900 82 1518 \n", + "\n", + " cnt \n", + "0 985 \n", + "1 801 \n", + "2 1349 \n", + "3 1562 \n", + "4 1600 " ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"raw_data/day.csv\", header=0, sep=\",\", parse_dates=[\"dteday\"])\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4a9DrmjyhhEP" + }, + "source": [ + "# Define column mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1697468132141, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "markdown", - "source": [ - "# Define column mapping" - ], - "metadata": { - "id": "4a9DrmjyhhEP" - } - }, - { - "cell_type": "code", - "source": [ - "from evidently.pipeline.column_mapping import ColumnMapping" - ], - "metadata": { - "id": "_bkEZuM8gELe", - "executionInfo": { - "status": "ok", - "timestamp": 1697468132141, - "user_tz": -120, - "elapsed": 5, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 6, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "data_columns = ColumnMapping()\n", - "data_columns.numerical_features = ['weathersit', 'temp', 'atemp', 'hum', 'windspeed']\n", - "data_columns.categorical_features = ['holiday', 'workingday']" - ], - "metadata": { - "id": "dLIZqkHAgEuo", - "executionInfo": { - "status": "ok", - "timestamp": 1697468132141, - "user_tz": -120, - "elapsed": 5, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Define what to log" - ], - "metadata": { - "id": "yNBKbk51hpyz" - } + "id": "_bkEZuM8gELe" + }, + "outputs": [], + "source": [ + "from evidently.pipeline.column_mapping import ColumnMapping" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1697468132141, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "code", - "source": [ - "from evidently.report import Report\n", - "from evidently.metric_preset import DataDriftPreset" - ], - "metadata": { - "id": "owblpS3Ahw0o", - "executionInfo": { - "status": "ok", - "timestamp": 1697468136565, - "user_tz": -120, - "elapsed": 4428, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 8, - "outputs": [] + "id": "dLIZqkHAgEuo" + }, + "outputs": [], + "source": [ + "data_columns = ColumnMapping()\n", + "data_columns.numerical_features = [\"weathersit\", \"temp\", \"atemp\", \"hum\", \"windspeed\"]\n", + "data_columns.categorical_features = [\"holiday\", \"workingday\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yNBKbk51hpyz" + }, + "source": [ + "# Define what to log" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "executionInfo": { + "elapsed": 4428, + "status": "ok", + "timestamp": 1697468136565, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "code", - "source": [ - "def eval_drift(reference, production, column_mapping):\n", - " data_drift_report = Report(metrics=[DataDriftPreset()])\n", - " data_drift_report.run(\n", - " reference_data=reference, current_data=production, column_mapping=column_mapping\n", - " )\n", - " report = data_drift_report.as_dict()\n", - "\n", - " drifts = []\n", - "\n", - " for feature in (\n", - " column_mapping.numerical_features + column_mapping.categorical_features\n", - " ):\n", - " drifts.append(\n", - " (\n", - " feature,\n", - " report[\"metrics\"][1][\"result\"][\"drift_by_columns\"][feature][\n", - " \"drift_score\"\n", - " ],\n", - " )\n", - " )\n", - "\n", - " return drifts\n" - ], - "metadata": { - "id": "vRF8PjiYho6z", - "executionInfo": { - "status": "ok", - "timestamp": 1697468136565, - "user_tz": -120, - "elapsed": 3, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 9, - "outputs": [] + "id": "owblpS3Ahw0o" + }, + "outputs": [], + "source": [ + "from evidently.report import Report\n", + "from evidently.metric_preset import DataDriftPreset" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "executionInfo": { + "elapsed": 3, + "status": "ok", + "timestamp": 1697468136565, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "markdown", - "source": [ - "# Define the comparison windows" - ], - "metadata": { - "id": "4Yhet51mh6Xz" - } + "id": "vRF8PjiYho6z" + }, + "outputs": [], + "source": [ + "def eval_drift(reference, production, column_mapping):\n", + " data_drift_report = Report(metrics=[DataDriftPreset()])\n", + " data_drift_report.run(\n", + " reference_data=reference, current_data=production, column_mapping=column_mapping\n", + " )\n", + " report = data_drift_report.as_dict()\n", + "\n", + " drifts = []\n", + "\n", + " for feature in (\n", + " column_mapping.numerical_features + column_mapping.categorical_features\n", + " ):\n", + " drifts.append(\n", + " (\n", + " feature,\n", + " report[\"metrics\"][1][\"result\"][\"drift_by_columns\"][feature][\n", + " \"drift_score\"\n", + " ],\n", + " )\n", + " )\n", + "\n", + " return drifts" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4Yhet51mh6Xz" + }, + "source": [ + "# Define the comparison windows" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "executionInfo": { + "elapsed": 3, + "status": "ok", + "timestamp": 1697468136565, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "code", - "source": [ - "#set reference dates\n", - "reference_dates = ('2011-01-01 00:00:00','2011-01-28 23:00:00')\n", - "\n", - "#set experiment batches dates\n", - "experiment_batches = [\n", - " ('2011-01-01 00:00:00','2011-01-29 23:00:00'),\n", - " ('2011-01-29 00:00:00','2011-02-07 23:00:00'),\n", - " ('2011-02-07 00:00:00','2011-02-14 23:00:00'),\n", - " ('2011-02-15 00:00:00','2011-02-21 23:00:00'),\n", - "]" - ], - "metadata": { - "id": "nTq8xUbGh3Ux", - "executionInfo": { - "status": "ok", - "timestamp": 1697468136565, - "user_tz": -120, - "elapsed": 3, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 10, - "outputs": [] + "id": "nTq8xUbGh3Ux" + }, + "outputs": [], + "source": [ + "# set reference dates\n", + "reference_dates = (\"2011-01-01 00:00:00\", \"2011-01-28 23:00:00\")\n", + "\n", + "# set experiment batches dates\n", + "experiment_batches = [\n", + " (\"2011-01-01 00:00:00\", \"2011-01-29 23:00:00\"),\n", + " (\"2011-01-29 00:00:00\", \"2011-02-07 23:00:00\"),\n", + " (\"2011-02-07 00:00:00\", \"2011-02-14 23:00:00\"),\n", + " (\"2011-02-15 00:00:00\", \"2011-02-21 23:00:00\"),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8lNq9OdniDss" + }, + "source": [ + "# Run and log experiments with DVCLive" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "executionInfo": { + "elapsed": 3, + "status": "ok", + "timestamp": 1697468136565, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "markdown", - "source": [ - "# Run and log experiments with DVCLive" - ], - "metadata": { - "id": "8lNq9OdniDss" - } + "id": "zUt5jrVSRIqD" + }, + "outputs": [], + "source": [ + "!git config --global user.email \"you@example.com\"\n", + "!git config --global user.name \"Your Name\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "executionInfo": { + "elapsed": 1231, + "status": "ok", + "timestamp": 1697468137794, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "code", - "source": [ - "!git config --global user.email \"you@example.com\"\n", - "!git config --global user.name \"Your Name\"" - ], - "metadata": { - "id": "zUt5jrVSRIqD", - "executionInfo": { - "status": "ok", - "timestamp": 1697468136565, - "user_tz": -120, - "elapsed": 3, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 11, - "outputs": [] + "id": "5Hx1jI9PnT3C" + }, + "outputs": [], + "source": [ + "from dvclive import Live" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jTsrtISaSF7D" + }, + "source": [ + "There are two ways to use DVC, put all the drift evaluation steps in one single experiment (corresponding to a git commit), or to save each step as a separate experiment (git commit)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGrEbbla30jr" + }, + "source": [ + "## In one experiment" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "source": [ - "from dvclive import Live" - ], - "metadata": { - "id": "5Hx1jI9PnT3C", - "executionInfo": { - "status": "ok", - "timestamp": 1697468137794, - "user_tz": -120, - "elapsed": 1231, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 12, - "outputs": [] + "executionInfo": { + "elapsed": 2844, + "status": "ok", + "timestamp": 1697468140631, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, + "id": "ijUf_HhRobl0", + "outputId": "796d7eec-17dc-40b2-a4c9-5bdcf9184c58" + }, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "There are two ways to use DVC, put all the drift evaluation steps in one single experiment (corresponding to a git commit), or to save each step as a separate experiment (git commit)" - ], - "metadata": { - "id": "jTsrtISaSF7D" - } + "name": "stdout", + "output_type": "stream", + "text": [ + "/content\n", + "/content/experiments\n", + "hint: Using 'master' as the name for the initial branch. This default branch name\n", + "hint: is subject to change. To configure the initial branch name to use in all\n", + "hint: of your new repositories, which will suppress this warning, call:\n", + "hint: \n", + "hint: \tgit config --global init.defaultBranch \n", + "hint: \n", + "hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and\n", + "hint: 'development'. The just-created branch can be renamed via this command:\n", + "hint: \n", + "hint: \tgit branch -m \n", + "Initialized empty Git repository in /content/experiments/.git/\n", + "fatal: pathspec '.gitignore' did not match any files\n", + "On branch master\n", + "\n", + "Initial commit\n", + "\n", + "nothing to commit (create/copy files and use \"git add\" to track)\n", + "Initialized DVC repository.\n", + "\n", + "You can now commit the changes to git.\n", + "\n", + "+---------------------------------------------------------------------+\n", + "| |\n", + "| DVC has enabled anonymous aggregate usage analytics. |\n", + "| Read the analytics documentation (and how to opt-out) here: |\n", + "| |\n", + "| |\n", + "+---------------------------------------------------------------------+\n", + "\n", + "What's next?\n", + "------------\n", + "- Check out the documentation: \n", + "- Get help and share ideas: \n", + "- Star us on GitHub: \n", + "[master (root-commit) 9220260] Init DVC\n", + " 3 files changed, 6 insertions(+)\n", + " create mode 100644 .dvc/.gitignore\n", + " create mode 100644 .dvc/config\n", + " create mode 100644 .dvcignore\n" + ] + } + ], + "source": [ + "# Setup a git repo with dvc\n", + "\n", + "%cd /content\n", + "!rm -rf experiments && mkdir experiments\n", + "%cd experiments\n", + "\n", + "!git init\n", + "!git add .gitignore\n", + "!git commit -m \"Init repo\"\n", + "!dvc init\n", + "!git commit -m \"Init DVC\"" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, - { - "cell_type": "markdown", - "source": [ - "## In one experiment" - ], - "metadata": { - "id": "RGrEbbla30jr" - } + "executionInfo": { + "elapsed": 16055, + "status": "ok", + "timestamp": 1697468156663, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, + "id": "_h-jGJqPiA30", + "outputId": "0b949e24-8c53-4765-a8ee-64d002b3801e" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Setup a git repo with dvc\n", - "\n", - "%cd /content\n", - "!rm -rf experiments && mkdir experiments\n", - "%cd experiments\n", - "\n", - "!git init\n", - "!git add .gitignore\n", - "!git commit -m \"Init repo\"\n", - "!dvc init\n", - "!git commit -m \"Init DVC\"" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ijUf_HhRobl0", - "executionInfo": { - "status": "ok", - "timestamp": 1697468140631, - "user_tz": -120, - "elapsed": 2844, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - }, - "outputId": "796d7eec-17dc-40b2-a4c9-5bdcf9184c58" - }, - "execution_count": 13, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/content\n", - "/content/experiments\n", - "hint: Using 'master' as the name for the initial branch. This default branch name\n", - "hint: is subject to change. To configure the initial branch name to use in all\n", - "hint: of your new repositories, which will suppress this warning, call:\n", - "hint: \n", - "hint: \tgit config --global init.defaultBranch \n", - "hint: \n", - "hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and\n", - "hint: 'development'. The just-created branch can be renamed via this command:\n", - "hint: \n", - "hint: \tgit branch -m \n", - "Initialized empty Git repository in /content/experiments/.git/\n", - "fatal: pathspec '.gitignore' did not match any files\n", - "On branch master\n", - "\n", - "Initial commit\n", - "\n", - "nothing to commit (create/copy files and use \"git add\" to track)\n", - "Initialized DVC repository.\n", - "\n", - "You can now commit the changes to git.\n", - "\n", - "+---------------------------------------------------------------------+\n", - "| |\n", - "| DVC has enabled anonymous aggregate usage analytics. |\n", - "| Read the analytics documentation (and how to opt-out) here: |\n", - "| |\n", - "| |\n", - "+---------------------------------------------------------------------+\n", - "\n", - "What's next?\n", - "------------\n", - "- Check out the documentation: \n", - "- Get help and share ideas: \n", - "- Star us on GitHub: \n", - "[master (root-commit) 9220260] Init DVC\n", - " 3 files changed, 6 insertions(+)\n", - " create mode 100644 .dvc/.gitignore\n", - " create mode 100644 .dvc/config\n", - " create mode 100644 .dvcignore\n" - ] - } + "data": { + "text/markdown": "# DVC Report\n\nparams.yaml\n\n| begin | end |\n|---------------------|---------------------|\n| 2011-02-15 00:00:00 | 2011-02-21 23:00:00 |\n\nmetrics.json\n\n| weathersit | temp | atemp | hum | windspeed | holiday | workingday | step |\n|--------------|--------|---------|-------|-------------|-----------|--------------|--------|\n| 0.231 | 0 | 0 | 0.062 | 0.012 | 0.275 | 0.593 | 3 |\n\n![static/holiday]()\n\n![static/windspeed]()\n\n![static/temp]()\n\n![static/workingday]()\n\n![static/weathersit]()\n\n![static/hum]()\n\n![static/atemp]()\n", + "text/plain": [ + "" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with Live(report=\"notebook\") as live:\n", + " for date in experiment_batches:\n", + " live.log_param(\"begin\", date[0])\n", + " live.log_param(\"end\", date[1])\n", + "\n", + " metrics = eval_drift(\n", + " df.loc[df.dteday.between(reference_dates[0], reference_dates[1])],\n", + " df.loc[df.dteday.between(date[0], date[1])],\n", + " column_mapping=data_columns,\n", + " )\n", + "\n", + " for feature in metrics:\n", + " live.log_metric(feature[0], round(feature[1], 3))\n", + "\n", + " live.next_step()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pc3jDX1q-y3c" + }, + "source": [ + "To explore the results from CLI:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "source": [ - "with Live(report=\"notebook\") as live:\n", - " for date in experiment_batches:\n", - " live.log_param(\"begin\", date[0])\n", - " live.log_param(\"end\", date[1])\n", - "\n", - " metrics = eval_drift(\n", - " df.loc[df.dteday.between(reference_dates[0], reference_dates[1])],\n", - " df.loc[df.dteday.between(date[0], date[1])],\n", - " column_mapping=data_columns,\n", - " )\n", - "\n", - " for feature in metrics:\n", - " live.log_metric(feature[0], round(feature[1], 3))\n", - "\n", - " live.next_step()" - ], - "metadata": { - "id": "_h-jGJqPiA30", - "executionInfo": { - "status": "ok", - "timestamp": 1697468156663, - "user_tz": -120, - "elapsed": 16055, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - }, - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "outputId": "0b949e24-8c53-4765-a8ee-64d002b3801e" - }, - "execution_count": 14, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "text/markdown": "# DVC Report\n\nparams.yaml\n\n| begin | end |\n|---------------------|---------------------|\n| 2011-02-15 00:00:00 | 2011-02-21 23:00:00 |\n\nmetrics.json\n\n| weathersit | temp | atemp | hum | windspeed | holiday | workingday | step |\n|--------------|--------|---------|-------|-------------|-----------|--------------|--------|\n| 0.231 | 0 | 0 | 0.062 | 0.012 | 0.275 | 0.593 | 3 |\n\n![static/holiday]()\n\n![static/windspeed]()\n\n![static/temp]()\n\n![static/workingday]()\n\n![static/weathersit]()\n\n![static/hum]()\n\n![static/atemp]()\n" - }, - "metadata": {} - } - ] + "executionInfo": { + "elapsed": 1434, + "status": "ok", + "timestamp": 1697468158085, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, + "id": "6OAsURiL-Ge2", + "outputId": "0fb47be1-f524-41f1-8c74-d4663d721290" + }, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "To explore the results from CLI:" - ], - "metadata": { - "id": "Pc3jDX1q-y3c" - } + "name": "stdout", + "output_type": "stream", + "text": [ + "\rReading plot's data from workspace: 0% 0/7 [00:00\n", + "\n", + "\n", + " \n", + " DVC Plot\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + "
\n", + " \n", + "
\n", + " \n", + "\n", + "" ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 802 - }, - "id": "pwdjxeEG-I49", - "executionInfo": { - "status": "ok", - "timestamp": 1697468158085, - "user_tz": -120, - "elapsed": 4, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - }, - "outputId": "e01c74f9-45b6-4715-b1bd-2dd20875a421" - }, - "execution_count": 16, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - "\n", - "\n", - " \n", - " DVC Plot\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - " \n", - "
\n", - " \n", - "
\n", - " \n", - "\n", - "
\n", - " \n", - "
\n", - " \n", - "\n", - "
\n", - " \n", - "
\n", - " \n", - "\n", - "
\n", - " \n", - "
\n", - " \n", - "\n", - "
\n", - " \n", - "
\n", - " \n", - "\n", - "
\n", - " \n", - "
\n", - " \n", - "\n", - "
\n", - " \n", - "
\n", - " \n", - "\n", - "" - ] - }, - "metadata": {}, - "execution_count": 16 - } + "text/plain": [ + "" ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import IPython\n", + "\n", + "IPython.display.HTML(filename=\"dvc_plots/index.html\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CCdF_ipAIY7k" + }, + "source": [ + "## In multiple experiments (one per step)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "markdown", - "source": [ - "## In multiple experiments (one per step)" - ], - "metadata": { - "id": "CCdF_ipAIY7k" - } - }, - { - "cell_type": "code", - "source": [ - "# Setup a git repo with dvc\n", - "\n", - "%cd /content\n", - "!rm -rf experiments && mkdir experiments\n", - "%cd experiments\n", - "\n", - "!git init\n", - "!git add .gitignore\n", - "!git commit -m \"Init repo\"\n", - "!dvc init\n", - "!git commit -m \"Init DVC\"" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0x81BAI--2Gm", - "executionInfo": { - "status": "ok", - "timestamp": 1697468159295, - "user_tz": -120, - "elapsed": 1213, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - }, - "outputId": "7fb22cea-d367-41b0-f27d-a99e9d6081dc" - }, - "execution_count": 17, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/content\n", - "/content/experiments\n", - "hint: Using 'master' as the name for the initial branch. This default branch name\n", - "hint: is subject to change. To configure the initial branch name to use in all\n", - "hint: of your new repositories, which will suppress this warning, call:\n", - "hint: \n", - "hint: \tgit config --global init.defaultBranch \n", - "hint: \n", - "hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and\n", - "hint: 'development'. The just-created branch can be renamed via this command:\n", - "hint: \n", - "hint: \tgit branch -m \n", - "Initialized empty Git repository in /content/experiments/.git/\n", - "fatal: pathspec '.gitignore' did not match any files\n", - "On branch master\n", - "\n", - "Initial commit\n", - "\n", - "nothing to commit (create/copy files and use \"git add\" to track)\n", - "Initialized DVC repository.\n", - "\n", - "You can now commit the changes to git.\n", - "\n", - "+---------------------------------------------------------------------+\n", - "| |\n", - "| DVC has enabled anonymous aggregate usage analytics. |\n", - "| Read the analytics documentation (and how to opt-out) here: |\n", - "| |\n", - "| |\n", - "+---------------------------------------------------------------------+\n", - "\n", - "What's next?\n", - "------------\n", - "- Check out the documentation: \n", - "- Get help and share ideas: \n", - "- Star us on GitHub: \n", - "[master (root-commit) 469083d] Init DVC\n", - " 3 files changed, 6 insertions(+)\n", - " create mode 100644 .dvc/.gitignore\n", - " create mode 100644 .dvc/config\n", - " create mode 100644 .dvcignore\n" - ] - } - ] + "executionInfo": { + "elapsed": 1213, + "status": "ok", + "timestamp": 1697468159295, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, + "id": "0x81BAI--2Gm", + "outputId": "7fb22cea-d367-41b0-f27d-a99e9d6081dc" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "from dvclive import Live\n", - "\n", - "for step, date in enumerate(experiment_batches):\n", - " with Live() as live:\n", - " live.log_param(\"step\", step)\n", - " live.log_param(\"begin\", date[0])\n", - " live.log_param(\"end\", date[1])\n", - "\n", - " metrics = eval_drift(\n", - " df.loc[df.dteday.between(reference_dates[0], reference_dates[1])],\n", - " df.loc[df.dteday.between(date[0], date[1])],\n", - " column_mapping=data_columns,\n", - " )\n", - "\n", - " for feature in metrics:\n", - " live.log_metric(feature[0], round(feature[1], 3))\n" - ], - "metadata": { - "id": "VfVLDwfD39qO", - "executionInfo": { - "status": "ok", - "timestamp": 1697468161649, - "user_tz": -120, - "elapsed": 2355, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 18, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "/content\n", + "/content/experiments\n", + "hint: Using 'master' as the name for the initial branch. This default branch name\n", + "hint: is subject to change. To configure the initial branch name to use in all\n", + "hint: of your new repositories, which will suppress this warning, call:\n", + "hint: \n", + "hint: \tgit config --global init.defaultBranch \n", + "hint: \n", + "hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and\n", + "hint: 'development'. The just-created branch can be renamed via this command:\n", + "hint: \n", + "hint: \tgit branch -m \n", + "Initialized empty Git repository in /content/experiments/.git/\n", + "fatal: pathspec '.gitignore' did not match any files\n", + "On branch master\n", + "\n", + "Initial commit\n", + "\n", + "nothing to commit (create/copy files and use \"git add\" to track)\n", + "Initialized DVC repository.\n", + "\n", + "You can now commit the changes to git.\n", + "\n", + "+---------------------------------------------------------------------+\n", + "| |\n", + "| DVC has enabled anonymous aggregate usage analytics. |\n", + "| Read the analytics documentation (and how to opt-out) here: |\n", + "| |\n", + "| |\n", + "+---------------------------------------------------------------------+\n", + "\n", + "What's next?\n", + "------------\n", + "- Check out the documentation: \n", + "- Get help and share ideas: \n", + "- Star us on GitHub: \n", + "[master (root-commit) 469083d] Init DVC\n", + " 3 files changed, 6 insertions(+)\n", + " create mode 100644 .dvc/.gitignore\n", + " create mode 100644 .dvc/config\n", + " create mode 100644 .dvcignore\n" + ] + } + ], + "source": [ + "# Setup a git repo with dvc\n", + "\n", + "%cd /content\n", + "!rm -rf experiments && mkdir experiments\n", + "%cd experiments\n", + "\n", + "!git init\n", + "!git add .gitignore\n", + "!git commit -m \"Init repo\"\n", + "!dvc init\n", + "!git commit -m \"Init DVC\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "executionInfo": { + "elapsed": 2355, + "status": "ok", + "timestamp": 1697468161649, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, - { - "cell_type": "code", - "source": [ - "import dvc.api\n", - "\n", - "pd.DataFrame(dvc.api.exp_show())" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 238 - }, - "id": "ijcN3PaZ6fM0", - "executionInfo": { - "status": "ok", - "timestamp": 1697468162078, - "user_tz": -120, - "elapsed": 433, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - }, - "outputId": "2d26f834-604f-4e28-8924-f5d97ae92596" - }, - "execution_count": 19, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Experiment rev typ Created parent State Executor \\\n", - "0 None workspace baseline None None None None \n", - "1 None master baseline 02:55 PM None None None \n", - "2 elite-mobs e4d6acd branch_commit 02:56 PM None None None \n", - "3 buxom-shes 439f6e1 branch_commit 02:56 PM None None None \n", - "4 hammy-skip b5b80b5 branch_commit 02:55 PM None None None \n", - "5 girly-sere 2ba9568 branch_base 02:55 PM None None None \n", - "\n", - " weathersit temp atemp hum windspeed holiday workingday step \\\n", - "0 0.231 NaN NaN 0.062 0.012 0.275 0.593 3.0 \n", - "1 NaN NaN NaN NaN NaN NaN NaN NaN \n", - "2 0.231 NaN NaN 0.062 0.012 0.275 0.593 3.0 \n", - "3 0.155 0.399 0.537 0.684 0.611 0.588 0.699 2.0 \n", - "4 0.985 1.000 1.000 1.000 1.000 0.980 0.851 NaN \n", - "5 0.779 0.098 0.107 0.030 0.171 0.545 0.653 1.0 \n", - "\n", - " begin end \n", - "0 2011-02-15 00:00:00 2011-02-21 23:00:00 \n", - "1 None None \n", - "2 2011-02-15 00:00:00 2011-02-21 23:00:00 \n", - "3 2011-02-07 00:00:00 2011-02-14 23:00:00 \n", - "4 2011-01-01 00:00:00 2011-01-29 23:00:00 \n", - "5 2011-01-29 00:00:00 2011-02-07 23:00:00 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ExperimentrevtypCreatedparentStateExecutorweathersittempatemphumwindspeedholidayworkingdaystepbeginend
0NoneworkspacebaselineNoneNoneNoneNone0.231NaNNaN0.0620.0120.2750.5933.02011-02-15 00:00:002011-02-21 23:00:00
1Nonemasterbaseline02:55 PMNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNoneNone
2elite-mobse4d6acdbranch_commit02:56 PMNoneNoneNone0.231NaNNaN0.0620.0120.2750.5933.02011-02-15 00:00:002011-02-21 23:00:00
3buxom-shes439f6e1branch_commit02:56 PMNoneNoneNone0.1550.3990.5370.6840.6110.5880.6992.02011-02-07 00:00:002011-02-14 23:00:00
4hammy-skipb5b80b5branch_commit02:55 PMNoneNoneNone0.9851.0001.0001.0001.0000.9800.851NaN2011-01-01 00:00:002011-01-29 23:00:00
5girly-sere2ba9568branch_base02:55 PMNoneNoneNone0.7790.0980.1070.0300.1710.5450.6531.02011-01-29 00:00:002011-02-07 23:00:00
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" - ] - }, - "metadata": {}, - "execution_count": 19 - } - ] + "id": "VfVLDwfD39qO" + }, + "outputs": [], + "source": [ + "from dvclive import Live\n", + "\n", + "for step, date in enumerate(experiment_batches):\n", + " with Live() as live:\n", + " live.log_param(\"step\", step)\n", + " live.log_param(\"begin\", date[0])\n", + " live.log_param(\"end\", date[1])\n", + "\n", + " metrics = eval_drift(\n", + " df.loc[df.dteday.between(reference_dates[0], reference_dates[1])],\n", + " df.loc[df.dteday.between(date[0], date[1])],\n", + " column_mapping=data_columns,\n", + " )\n", + "\n", + " for feature in metrics:\n", + " live.log_metric(feature[0], round(feature[1], 3))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 }, - { - "cell_type": "markdown", - "source": [ - "To explore the results from CLI:" - ], - "metadata": { - "id": "TQE5aBWl-sef" - } + "executionInfo": { + "elapsed": 433, + "status": "ok", + "timestamp": 1697468162078, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 }, + "id": "ijcN3PaZ6fM0", + "outputId": "2d26f834-604f-4e28-8924-f5d97ae92596" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "!dvc exp show" + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ExperimentrevtypCreatedparentStateExecutorweathersittempatemphumwindspeedholidayworkingdaystepbeginend
0NoneworkspacebaselineNoneNoneNoneNone0.231NaNNaN0.0620.0120.2750.5933.02011-02-15 00:00:002011-02-21 23:00:00
1Nonemasterbaseline02:55 PMNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNoneNone
2elite-mobse4d6acdbranch_commit02:56 PMNoneNoneNone0.231NaNNaN0.0620.0120.2750.5933.02011-02-15 00:00:002011-02-21 23:00:00
3buxom-shes439f6e1branch_commit02:56 PMNoneNoneNone0.1550.3990.5370.6840.6110.5880.6992.02011-02-07 00:00:002011-02-14 23:00:00
4hammy-skipb5b80b5branch_commit02:55 PMNoneNoneNone0.9851.0001.0001.0001.0000.9800.851NaN2011-01-01 00:00:002011-01-29 23:00:00
5girly-sere2ba9568branch_base02:55 PMNoneNoneNone0.7790.0980.1070.0300.1710.5450.6531.02011-01-29 00:00:002011-02-07 23:00:00
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" ], - "metadata": { - "id": "oZtY-97bQj-Q", - "executionInfo": { - "status": "ok", - "timestamp": 1697468163295, - "user_tz": -120, - "elapsed": 1221, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - }, - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "14eb8d4c-c9ce-4bb8-caba-42e46d45bb65" - }, - "execution_count": 20, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", - " Experiment Created weathersit temp atemp hum windspeed holiday workingday step begin end \n", - " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", - " workspace - 0.231 0 0 0.062 0.012 0.275 0.593 3 2011-02-15 00:00:00 2011-02-21 23:00:00 \n", - " master 02:55 PM - - - - - - - - - - \n", - " ├── e4d6acd [elite-mobs] 02:56 PM 0.231 0 0 0.062 0.012 0.275 0.593 3 2011-02-15 00:00:00 2011-02-21 23:00:00 \n", - " ├── 439f6e1 [buxom-shes] 02:56 PM 0.155 0.399 0.537 0.684 0.611 0.588 0.699 2 2011-02-07 00:00:00 2011-02-14 23:00:00 \n", - " ├── b5b80b5 [hammy-skip] 02:55 PM 0.985 1 1 1 1 0.98 0.851 0 2011-01-01 00:00:00 2011-01-29 23:00:00 \n", - " └── 2ba9568 [girly-sere] 02:55 PM 0.779 0.098 0.107 0.03 0.171 0.545 0.653 1 2011-01-29 00:00:00 2011-02-07 23:00:00 \n", - " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n" - ] - } + "text/plain": [ + " Experiment rev typ Created parent State Executor \\\n", + "0 None workspace baseline None None None None \n", + "1 None master baseline 02:55 PM None None None \n", + "2 elite-mobs e4d6acd branch_commit 02:56 PM None None None \n", + "3 buxom-shes 439f6e1 branch_commit 02:56 PM None None None \n", + "4 hammy-skip b5b80b5 branch_commit 02:55 PM None None None \n", + "5 girly-sere 2ba9568 branch_base 02:55 PM None None None \n", + "\n", + " weathersit temp atemp hum windspeed holiday workingday step \\\n", + "0 0.231 NaN NaN 0.062 0.012 0.275 0.593 3.0 \n", + "1 NaN NaN NaN NaN NaN NaN NaN NaN \n", + "2 0.231 NaN NaN 0.062 0.012 0.275 0.593 3.0 \n", + "3 0.155 0.399 0.537 0.684 0.611 0.588 0.699 2.0 \n", + "4 0.985 1.000 1.000 1.000 1.000 0.980 0.851 NaN \n", + "5 0.779 0.098 0.107 0.030 0.171 0.545 0.653 1.0 \n", + "\n", + " begin end \n", + "0 2011-02-15 00:00:00 2011-02-21 23:00:00 \n", + "1 None None \n", + "2 2011-02-15 00:00:00 2011-02-21 23:00:00 \n", + "3 2011-02-07 00:00:00 2011-02-14 23:00:00 \n", + "4 2011-01-01 00:00:00 2011-01-29 23:00:00 \n", + "5 2011-01-29 00:00:00 2011-02-07 23:00:00 " ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dvc.api\n", + "\n", + "pd.DataFrame(dvc.api.exp_show())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TQE5aBWl-sef" + }, + "source": [ + "To explore the results from CLI:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "executionInfo": { + "elapsed": 1221, + "status": "ok", + "timestamp": 1697468163295, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 + }, + "id": "oZtY-97bQj-Q", + "outputId": "14eb8d4c-c9ce-4bb8-caba-42e46d45bb65" + }, + "outputs": [ { - "cell_type": "code", - "source": [], - "metadata": { - "id": "QoYexufp-qw2", - "executionInfo": { - "status": "ok", - "timestamp": 1697468163757, - "user_tz": -120, - "elapsed": 464, - "user": { - "displayName": "Francesco Motoko", - "userId": "00974636158007469548" - } - } - }, - "execution_count": 20, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", + " Experiment Created weathersit temp atemp hum windspeed holiday workingday step begin end \n", + " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n", + " workspace - 0.231 0 0 0.062 0.012 0.275 0.593 3 2011-02-15 00:00:00 2011-02-21 23:00:00 \n", + " master 02:55 PM - - - - - - - - - - \n", + " ├── e4d6acd [elite-mobs] 02:56 PM 0.231 0 0 0.062 0.012 0.275 0.593 3 2011-02-15 00:00:00 2011-02-21 23:00:00 \n", + " ├── 439f6e1 [buxom-shes] 02:56 PM 0.155 0.399 0.537 0.684 0.611 0.588 0.699 2 2011-02-07 00:00:00 2011-02-14 23:00:00 \n", + " ├── b5b80b5 [hammy-skip] 02:55 PM 0.985 1 1 1 1 0.98 0.851 0 2011-01-01 00:00:00 2011-01-29 23:00:00 \n", + " └── 2ba9568 [girly-sere] 02:55 PM 0.779 0.098 0.107 0.03 0.171 0.545 0.653 1 2011-01-29 00:00:00 2011-02-07 23:00:00 \n", + " ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── \n" + ] } - ] + ], + "source": [ + "!dvc exp show" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "executionInfo": { + "elapsed": 464, + "status": "ok", + "timestamp": 1697468163757, + "user": { + "displayName": "Francesco Motoko", + "userId": "00974636158007469548" + }, + "user_tz": -120 + }, + "id": "QoYexufp-qw2" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyNJAdha/v4n9zLqIfGakg0E", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/examples/DVCLive-Fabric.ipynb b/examples/DVCLive-Fabric.ipynb index 28bcffa9..cdeaccef 100644 --- a/examples/DVCLive-Fabric.ipynb +++ b/examples/DVCLive-Fabric.ipynb @@ -1,315 +1,334 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "QKSE19fW_Dnj" - }, - "source": [ - "# DVCLive and Lightning Fabric" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "q-C_4R_o_QGG" - }, - "source": [ - "## Install dvclive" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-XFbvwq7TSwN", - "outputId": "15d0e3b5-bb4a-4b3e-d37f-21608d1822ed" - }, - "outputs": [], - "source": [ - "!pip install \"dvclive[lightning]\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "I6S6Uru1_Y0x" - }, - "source": [ - "## Initialize DVC Repository" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WcbvUl2uTV0y", - "outputId": "aff9740c-26db-483d-ce30-cfef395f3cbb" - }, - "outputs": [], - "source": [ - "!git init -q\n", - "!git config --local user.email \"you@example.com\"\n", - "!git config --local user.name \"Your Name\"\n", - "!dvc init -q\n", - "!git commit -m \"DVC init\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LmY4PLMh_cUk" - }, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "85qErT5yTEbN" - }, - "outputs": [], - "source": [ - "import argparse\n", - "from os import path\n", - "from types import SimpleNamespace\n", - "\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "import torchvision.transforms as T\n", - "from lightning.fabric import Fabric, seed_everything\n", - "from lightning.fabric.utilities.rank_zero import rank_zero_only\n", - "from torch.optim.lr_scheduler import StepLR\n", - "from torchmetrics.classification import Accuracy\n", - "from torchvision.datasets import MNIST\n", - "\n", - "from dvclive.fabric import DVCLiveLogger\n", - "\n", - "DATASETS_PATH = (\"Datasets\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UrmAHbhr_lgs" - }, - "source": [ - "## Setup model code\n", - "\n", - "Adapted from https://github.com/Lightning-AI/pytorch-lightning/blob/master/examples/fabric/image_classifier/train_fabric.py.\n", - "\n", - "Look for the `logger` statements where DVCLiveLogger calls were added." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UCzTygUnTHM8" - }, - "outputs": [], - "source": [ - "class Net(nn.Module):\n", - " def __init__(self) -> None:\n", - " super().__init__()\n", - " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n", - " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n", - " self.dropout1 = nn.Dropout(0.25)\n", - " self.dropout2 = nn.Dropout(0.5)\n", - " self.fc1 = nn.Linear(9216, 128)\n", - " self.fc2 = nn.Linear(128, 10)\n", - "\n", - " def forward(self, x):\n", - " x = self.conv1(x)\n", - " x = F.relu(x)\n", - " x = self.conv2(x)\n", - " x = F.relu(x)\n", - " x = F.max_pool2d(x, 2)\n", - " x = self.dropout1(x)\n", - " x = torch.flatten(x, 1)\n", - " x = self.fc1(x)\n", - " x = F.relu(x)\n", - " x = self.dropout2(x)\n", - " x = self.fc2(x)\n", - " return F.log_softmax(x, dim=1)\n", - "\n", - "\n", - "def run(hparams):\n", - " # Create the DVCLive Logger\n", - " logger = DVCLiveLogger(report=\"notebook\")\n", - "\n", - " # Log dict of hyperparameters\n", - " logger.log_hyperparams(hparams.__dict__)\n", - "\n", - " # Create the Lightning Fabric object. The parameters like accelerator, strategy, devices etc. will be proided\n", - " # by the command line. See all options: `lightning run model --help`\n", - " fabric = Fabric()\n", - "\n", - " seed_everything(hparams.seed) # instead of torch.manual_seed(...)\n", - "\n", - " transform = T.Compose([T.ToTensor(), T.Normalize((0.1307,), (0.3081,))])\n", - "\n", - " # Let rank 0 download the data first, then everyone will load MNIST\n", - " with fabric.rank_zero_first(local=False): # set `local=True` if your filesystem is not shared between machines\n", - " train_dataset = MNIST(DATASETS_PATH, download=fabric.is_global_zero, train=True, transform=transform)\n", - " test_dataset = MNIST(DATASETS_PATH, download=fabric.is_global_zero, train=False, transform=transform)\n", - "\n", - " train_loader = torch.utils.data.DataLoader(\n", - " train_dataset,\n", - " batch_size=hparams.batch_size,\n", - " )\n", - " test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=hparams.batch_size)\n", - "\n", - " # don't forget to call `setup_dataloaders` to prepare for dataloaders for distributed training.\n", - " train_loader, test_loader = fabric.setup_dataloaders(train_loader, test_loader)\n", - "\n", - " model = Net() # remove call to .to(device)\n", - " optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)\n", - "\n", - " # don't forget to call `setup` to prepare for model / optimizer for distributed training.\n", - " # the model is moved automatically to the right device.\n", - " model, optimizer = fabric.setup(model, optimizer)\n", - "\n", - " scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)\n", - "\n", - " # use torchmetrics instead of manually computing the accuracy\n", - " test_acc = Accuracy(task=\"multiclass\", num_classes=10).to(fabric.device)\n", - "\n", - " # EPOCH LOOP\n", - " for epoch in range(1, hparams.epochs + 1):\n", - " # TRAINING LOOP\n", - " model.train()\n", - " for batch_idx, (data, target) in enumerate(train_loader):\n", - " # NOTE: no need to call `.to(device)` on the data, target\n", - " optimizer.zero_grad()\n", - " output = model(data)\n", - " loss = F.nll_loss(output, target)\n", - " fabric.backward(loss) # instead of loss.backward()\n", - "\n", - " optimizer.step()\n", - " if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval == 0):\n", - " print(\n", - " \"Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}\".format(\n", - " epoch,\n", - " batch_idx * len(data),\n", - " len(train_loader.dataset),\n", - " 100.0 * batch_idx / len(train_loader),\n", - " loss.item(),\n", - " )\n", - " )\n", - "\n", - " # Log dict of metrics\n", - " logger.log_metrics({\"loss\": loss.item()})\n", - "\n", - " if hparams.dry_run:\n", - " break\n", - "\n", - " scheduler.step()\n", - "\n", - " # TESTING LOOP\n", - " model.eval()\n", - " test_loss = 0\n", - " with torch.no_grad():\n", - " for data, target in test_loader:\n", - " # NOTE: no need to call `.to(device)` on the data, target\n", - " output = model(data)\n", - " test_loss += F.nll_loss(output, target, reduction=\"sum\").item()\n", - "\n", - " # WITHOUT TorchMetrics\n", - " # pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n", - " # correct += pred.eq(target.view_as(pred)).sum().item()\n", - "\n", - " # WITH TorchMetrics\n", - " test_acc(output, target)\n", - "\n", - " if hparams.dry_run:\n", - " break\n", - "\n", - " # all_gather is used to aggregated the value across processes\n", - " test_loss = fabric.all_gather(test_loss).sum() / len(test_loader.dataset)\n", - "\n", - " print(f\"\\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({100 * test_acc.compute():.0f}%)\\n\")\n", - "\n", - " # log additional metrics\n", - " logger.log_metrics({\"test_loss\": test_loss, \"test_acc\": 100 * test_acc.compute()})\n", - "\n", - " test_acc.reset()\n", - "\n", - " if hparams.dry_run:\n", - " break\n", - "\n", - " # When using distributed training, use `fabric.save`\n", - " # to ensure the current process is allowed to save a checkpoint\n", - " if hparams.save_model:\n", - " fabric.save(\"mnist_cnn.pt\", model.state_dict())\n", - "\n", - " # `logger.experiment` provides access to the `dvclive.Live` instance where you can use additional logging methods.\n", - " # Check that `rank_zero_only.rank == 0` to avoid logging in other processes.\n", - " if rank_zero_only.rank == 0:\n", - " logger.experiment.log_artifact(\"mnist_cnn.pt\")\n", - "\n", - " # Call finalize to save final results as a DVC experiment\n", - " logger.finalize(\"success\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "o5_v9lRDAM7l" - }, - "source": [ - "## Train the model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "BbCXen1PTM4V", - "outputId": "b79c90eb-74cc-474d-c0dd-21245064bca8" - }, - "outputs": [], - "source": [ - "hparams = SimpleNamespace(batch_size=64, epochs=5, lr=1.0, gamma=0.7, dry_run=False, seed=1, log_interval=10, save_model=True)\n", - "run(hparams)" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "QKSE19fW_Dnj" + }, + "source": [ + "# DVCLive and Lightning Fabric" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q-C_4R_o_QGG" + }, + "source": [ + "## Install dvclive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DnqCrlbLAopV" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { + "id": "-XFbvwq7TSwN", + "outputId": "15d0e3b5-bb4a-4b3e-d37f-21608d1822ed" + }, + "outputs": [], + "source": [ + "!pip install \"dvclive[lightning]\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I6S6Uru1_Y0x" + }, + "source": [ + "## Initialize DVC Repository" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/" }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" + "id": "WcbvUl2uTV0y", + "outputId": "aff9740c-26db-483d-ce30-cfef395f3cbb" + }, + "outputs": [], + "source": [ + "!git init -q\n", + "!git config --local user.email \"you@example.com\"\n", + "!git config --local user.name \"Your Name\"\n", + "!dvc init -q\n", + "!git commit -m \"DVC init\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LmY4PLMh_cUk" + }, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "85qErT5yTEbN" + }, + "outputs": [], + "source": [ + "from types import SimpleNamespace\n", + "\n", + "import torch\n", + "from torch import nn\n", + "import torch.nn.functional as F\n", + "from torch import optim\n", + "import torchvision.transforms as T\n", + "from lightning.fabric import Fabric, seed_everything\n", + "from lightning.fabric.utilities.rank_zero import rank_zero_only\n", + "from torch.optim.lr_scheduler import StepLR\n", + "from torchmetrics.classification import Accuracy\n", + "from torchvision.datasets import MNIST\n", + "\n", + "from dvclive.fabric import DVCLiveLogger\n", + "\n", + "DATASETS_PATH = \"Datasets\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UrmAHbhr_lgs" + }, + "source": [ + "## Setup model code\n", + "\n", + "Adapted from https://github.com/Lightning-AI/pytorch-lightning/blob/master/examples/fabric/image_classifier/train_fabric.py.\n", + "\n", + "Look for the `logger` statements where DVCLiveLogger calls were added." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UCzTygUnTHM8" + }, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self) -> None:\n", + " super().__init__()\n", + " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n", + " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n", + " self.dropout1 = nn.Dropout(0.25)\n", + " self.dropout2 = nn.Dropout(0.5)\n", + " self.fc1 = nn.Linear(9216, 128)\n", + " self.fc2 = nn.Linear(128, 10)\n", + "\n", + " def forward(self, x):\n", + " x = self.conv1(x)\n", + " x = F.relu(x)\n", + " x = self.conv2(x)\n", + " x = F.relu(x)\n", + " x = F.max_pool2d(x, 2)\n", + " x = self.dropout1(x)\n", + " x = torch.flatten(x, 1)\n", + " x = self.fc1(x)\n", + " x = F.relu(x)\n", + " x = self.dropout2(x)\n", + " x = self.fc2(x)\n", + " return F.log_softmax(x, dim=1)\n", + "\n", + "\n", + "def run(hparams):\n", + " # Create the DVCLive Logger\n", + " logger = DVCLiveLogger(report=\"notebook\")\n", + "\n", + " # Log dict of hyperparameters\n", + " logger.log_hyperparams(hparams.__dict__)\n", + "\n", + " # Create the Lightning Fabric object. The parameters like accelerator, strategy, devices etc. will be proided\n", + " # by the command line. See all options: `lightning run model --help`\n", + " fabric = Fabric()\n", + "\n", + " seed_everything(hparams.seed) # instead of torch.manual_seed(...)\n", + "\n", + " transform = T.Compose([T.ToTensor(), T.Normalize((0.1307,), (0.3081,))])\n", + "\n", + " # Let rank 0 download the data first, then everyone will load MNIST\n", + " with fabric.rank_zero_first(\n", + " local=False\n", + " ): # set `local=True` if your filesystem is not shared between machines\n", + " train_dataset = MNIST(\n", + " DATASETS_PATH,\n", + " download=fabric.is_global_zero,\n", + " train=True,\n", + " transform=transform,\n", + " )\n", + " test_dataset = MNIST(\n", + " DATASETS_PATH,\n", + " download=fabric.is_global_zero,\n", + " train=False,\n", + " transform=transform,\n", + " )\n", + "\n", + " train_loader = torch.utils.data.DataLoader(\n", + " train_dataset,\n", + " batch_size=hparams.batch_size,\n", + " )\n", + " test_loader = torch.utils.data.DataLoader(\n", + " test_dataset, batch_size=hparams.batch_size\n", + " )\n", + "\n", + " # don't forget to call `setup_dataloaders` to prepare for dataloaders for distributed training.\n", + " train_loader, test_loader = fabric.setup_dataloaders(train_loader, test_loader)\n", + "\n", + " model = Net() # remove call to .to(device)\n", + " optimizer = optim.Adadelta(model.parameters(), lr=hparams.lr)\n", + "\n", + " # don't forget to call `setup` to prepare for model / optimizer for distributed training.\n", + " # the model is moved automatically to the right device.\n", + " model, optimizer = fabric.setup(model, optimizer)\n", + "\n", + " scheduler = StepLR(optimizer, step_size=1, gamma=hparams.gamma)\n", + "\n", + " # use torchmetrics instead of manually computing the accuracy\n", + " test_acc = Accuracy(task=\"multiclass\", num_classes=10).to(fabric.device)\n", + "\n", + " # EPOCH LOOP\n", + " for epoch in range(1, hparams.epochs + 1):\n", + " # TRAINING LOOP\n", + " model.train()\n", + " for batch_idx, (data, target) in enumerate(train_loader):\n", + " # NOTE: no need to call `.to(device)` on the data, target\n", + " optimizer.zero_grad()\n", + " output = model(data)\n", + " loss = F.nll_loss(output, target)\n", + " fabric.backward(loss) # instead of loss.backward()\n", + "\n", + " optimizer.step()\n", + " if (batch_idx == 0) or ((batch_idx + 1) % hparams.log_interval == 0):\n", + " print(\n", + " f\"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100.0 * batch_idx / len(train_loader):.0f}%)]\\tLoss: {loss.item():.6f}\"\n", + " )\n", + "\n", + " # Log dict of metrics\n", + " logger.log_metrics({\"loss\": loss.item()})\n", + "\n", + " if hparams.dry_run:\n", + " break\n", + "\n", + " scheduler.step()\n", + "\n", + " # TESTING LOOP\n", + " model.eval()\n", + " test_loss = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " # NOTE: no need to call `.to(device)` on the data, target\n", + " output = model(data)\n", + " test_loss += F.nll_loss(output, target, reduction=\"sum\").item()\n", + "\n", + " # WITHOUT TorchMetrics\n", + " # pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n", + " # correct += pred.eq(target.view_as(pred)).sum().item()\n", + "\n", + " # WITH TorchMetrics\n", + " test_acc(output, target)\n", + "\n", + " if hparams.dry_run:\n", + " break\n", + "\n", + " # all_gather is used to aggregated the value across processes\n", + " test_loss = fabric.all_gather(test_loss).sum() / len(test_loader.dataset)\n", + "\n", + " print(\n", + " f\"\\nTest set: Average loss: {test_loss:.4f}, Accuracy: ({100 * test_acc.compute():.0f}%)\\n\"\n", + " )\n", + "\n", + " # log additional metrics\n", + " logger.log_metrics(\n", + " {\"test_loss\": test_loss, \"test_acc\": 100 * test_acc.compute()}\n", + " )\n", + "\n", + " test_acc.reset()\n", + "\n", + " if hparams.dry_run:\n", + " break\n", + "\n", + " # When using distributed training, use `fabric.save`\n", + " # to ensure the current process is allowed to save a checkpoint\n", + " if hparams.save_model:\n", + " fabric.save(\"mnist_cnn.pt\", model.state_dict())\n", + "\n", + " # `logger.experiment` provides access to the `dvclive.Live` instance where you can use additional logging methods.\n", + " # Check that `rank_zero_only.rank == 0` to avoid logging in other processes.\n", + " if rank_zero_only.rank == 0:\n", + " logger.experiment.log_artifact(\"mnist_cnn.pt\")\n", + "\n", + " # Call finalize to save final results as a DVC experiment\n", + " logger.finalize(\"success\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o5_v9lRDAM7l" + }, + "source": [ + "## Train the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, - "language_info": { - "name": "python" - } + "id": "BbCXen1PTM4V", + "outputId": "b79c90eb-74cc-474d-c0dd-21245064bca8" + }, + "outputs": [], + "source": [ + "hparams = SimpleNamespace(\n", + " batch_size=64,\n", + " epochs=5,\n", + " lr=1.0,\n", + " gamma=0.7,\n", + " dry_run=False,\n", + " seed=1,\n", + " log_interval=10,\n", + " save_model=True,\n", + ")\n", + "run(hparams)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DnqCrlbLAopV" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/examples/DVCLive-HuggingFace.ipynb b/examples/DVCLive-HuggingFace.ipynb index 6f9a81dd..0d1946f2 100644 --- a/examples/DVCLive-HuggingFace.ipynb +++ b/examples/DVCLive-HuggingFace.ipynb @@ -106,11 +106,23 @@ "source": [ "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-cased\")\n", "\n", + "\n", "def tokenize_function(examples):\n", " return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n", "\n", - "small_train_dataset = dataset[\"train\"].shuffle(seed=42).select(range(2000)).map(tokenize_function, batched=True)\n", - "small_eval_dataset = dataset[\"test\"].shuffle(seed=42).select(range(200)).map(tokenize_function, batched=True)" + "\n", + "small_train_dataset = (\n", + " dataset[\"train\"]\n", + " .shuffle(seed=42)\n", + " .select(range(2000))\n", + " .map(tokenize_function, batched=True)\n", + ")\n", + "small_eval_dataset = (\n", + " dataset[\"test\"]\n", + " .shuffle(seed=42)\n", + " .select(range(200))\n", + " .map(tokenize_function, batched=True)\n", + ")" ] }, { @@ -138,6 +150,7 @@ "\n", "metric = evaluate.load(\"f1\")\n", "\n", + "\n", "def compute_metrics(eval_pred):\n", " logits, labels = eval_pred\n", " predictions = np.argmax(logits, axis=-1)\n", @@ -178,7 +191,9 @@ "from transformers.integrations import DVCLiveCallback\n", "from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n", "\n", - "model = AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-cased\", num_labels=2)\n", + "model = AutoModelForSequenceClassification.from_pretrained(\n", + " \"distilbert-base-cased\", num_labels=2\n", + ")\n", "for param in model.base_model.parameters():\n", " param.requires_grad = False\n", "\n", @@ -224,7 +239,6 @@ "outputs": [], "source": [ "from dvclive import Live\n", - "from transformers.integrations import DVCLiveCallback\n", "\n", "lr = 1e-4\n", "\n", @@ -273,13 +287,13 @@ "import dvc.api\n", "import pandas as pd\n", "\n", - "columns = [\"Experiment\", \"epoch\", \"eval.f1\"]\n", + "columns = [\"Experiment\", \"epoch\", \"eval.f1\"]\n", "\n", "df = pd.DataFrame(dvc.api.exp_show(), columns=columns)\n", "\n", "df.dropna(inplace=True)\n", "df.reset_index(drop=True, inplace=True)\n", - "df\n" + "df" ] }, { @@ -302,7 +316,8 @@ "outputs": [], "source": [ "from IPython.display import HTML\n", - "HTML(filename='./dvc_plots/index.html')" + "\n", + "HTML(filename=\"./dvc_plots/index.html\")" ] } ], diff --git a/examples/DVCLive-PyTorch-Lightning.ipynb b/examples/DVCLive-PyTorch-Lightning.ipynb index 1e140e48..8e152b74 100644 --- a/examples/DVCLive-PyTorch-Lightning.ipynb +++ b/examples/DVCLive-PyTorch-Lightning.ipynb @@ -1,273 +1,276 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "A812CVYi_B2b" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gPh2FiPo_B2e" - }, - "source": [ - "# DVCLive and PyTorch Lightning" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "m0XW9Ml7_B2e" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QivH1_cU_B2f" - }, - "outputs": [], - "source": [ - "!pip install \"dvclive[lightning]\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pn_5GW1f_B2g" - }, - "outputs": [], - "source": [ - "!git init -q\n", - "!git config --local user.email \"you@example.com\"\n", - "!git config --local user.name \"Your Name\"\n", - "!dvc init -q\n", - "!git commit -m \"DVC init\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zC9hk7kibFTX" - }, - "source": [ - "### Define LightningModule" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "t5PxdljP_B2h" - }, - "outputs": [], - "source": [ - "import lightning.pytorch as pl\n", - "import torch\n", - "\n", - "class LitAutoEncoder(pl.LightningModule):\n", - " def __init__(self, encoder_size=64, lr=1e-3):\n", - " super().__init__()\n", - " self.save_hyperparameters()\n", - " self.encoder = torch.nn.Sequential(\n", - " torch.nn.Linear(28 * 28, encoder_size),\n", - " torch.nn.ReLU(),\n", - " torch.nn.Linear(encoder_size, 3)\n", - " )\n", - " self.decoder = torch.nn.Sequential(\n", - " torch.nn.Linear(3, encoder_size),\n", - " torch.nn.ReLU(),\n", - " torch.nn.Linear(encoder_size, 28 * 28)\n", - " )\n", - "\n", - " def training_step(self, batch, batch_idx):\n", - " x, y = batch\n", - " x = x.view(x.size(0), -1)\n", - " z = self.encoder(x)\n", - " x_hat = self.decoder(z)\n", - " train_mse = torch.nn.functional.mse_loss(x_hat, x)\n", - " self.log(\"train_mse\", train_mse)\n", - " return train_mse\n", - "\n", - " def validation_step(self, batch, batch_idx):\n", - " x, y = batch\n", - " x = x.view(x.size(0), -1)\n", - " z = self.encoder(x)\n", - " x_hat = self.decoder(z)\n", - " val_mse = torch.nn.functional.mse_loss(x_hat, x)\n", - " self.log(\"val_mse\", val_mse)\n", - " return val_mse\n", - "\n", - " def configure_optimizers(self):\n", - " optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)\n", - " return optimizer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "St0ElX9obqRS" - }, - "source": [ - "### Dataset and loaders" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "T5s53qgr_B2h" - }, - "outputs": [], - "source": [ - "from torchvision.datasets import MNIST\n", - "import torchvision.transforms as transforms\n", - "\n", - "transform = transforms.ToTensor()\n", - "train_set = MNIST(root=\"MNIST\", download=True, train=True, transform=transform)\n", - "validation_set = MNIST(root=\"MNIST\", download=True, train=False, transform=transform)\n", - "train_loader = torch.utils.data.DataLoader(train_set)\n", - "validation_loader = torch.utils.data.DataLoader(validation_set)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ttiwwreH_B2i" - }, - "source": [ - "# Tracking experiments with DVCLive" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sE6qj6BMoDkn" - }, - "outputs": [], - "source": [ - "from dvclive.lightning import DVCLiveLogger" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XDqNY8pL_B2i" - }, - "outputs": [], - "source": [ - "for encoder_size in (64, 128):\n", - " for lr in (1e-3, 0.1):\n", - " model = LitAutoEncoder(encoder_size=encoder_size, lr=lr)\n", - " trainer = pl.Trainer(\n", - " limit_train_batches=200,\n", - " limit_val_batches=100,\n", - " max_epochs=5,\n", - " logger=DVCLiveLogger(log_model=True, report=\"notebook\"),\n", - " )\n", - " trainer.fit(model, train_loader, validation_loader)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7zEi0BXp_B2i" - }, - "source": [ - "## Comparing results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1aHmLHmf_B2i" - }, - "outputs": [], - "source": [ - "import dvc.api\n", - "import pandas as pd\n", - "\n", - "columns = [\"Experiment\", \"encoder_size\", \"lr\", \"train.mse\", \"val.mse\"]\n", - "\n", - "df = pd.DataFrame(dvc.api.exp_show(), columns=columns)\n", - "\n", - "df.dropna(inplace=True)\n", - "df.reset_index(drop=True, inplace=True)\n", - "df\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "db42qeHEGqTA" - }, - "outputs": [], - "source": [ - "from plotly.express import parallel_coordinates\n", - "fig = parallel_coordinates(df, columns, color=\"val.mse\")\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3cfvi0Uk_B2j" - }, - "outputs": [], - "source": [ - "!dvc plots diff $(dvc exp list --names-only)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Zx5n2zbn_B2j" - }, - "outputs": [], - "source": [ - "from IPython.display import HTML\n", - "HTML(filename='./dvc_plots/index.html')" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "A812CVYi_B2b" + }, + "source": [ + "\"Open" + ] }, - "nbformat": 4, - "nbformat_minor": 0 + { + "cell_type": "markdown", + "metadata": { + "id": "gPh2FiPo_B2e" + }, + "source": [ + "# DVCLive and PyTorch Lightning" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m0XW9Ml7_B2e" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QivH1_cU_B2f" + }, + "outputs": [], + "source": [ + "!pip install \"dvclive[lightning]\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pn_5GW1f_B2g" + }, + "outputs": [], + "source": [ + "!git init -q\n", + "!git config --local user.email \"you@example.com\"\n", + "!git config --local user.name \"Your Name\"\n", + "!dvc init -q\n", + "!git commit -m \"DVC init\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zC9hk7kibFTX" + }, + "source": [ + "### Define LightningModule" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t5PxdljP_B2h" + }, + "outputs": [], + "source": [ + "import lightning.pytorch as pl\n", + "import torch\n", + "\n", + "\n", + "class LitAutoEncoder(pl.LightningModule):\n", + " def __init__(self, encoder_size=64, lr=1e-3):\n", + " super().__init__()\n", + " self.save_hyperparameters()\n", + " self.encoder = torch.nn.Sequential(\n", + " torch.nn.Linear(28 * 28, encoder_size),\n", + " torch.nn.ReLU(),\n", + " torch.nn.Linear(encoder_size, 3),\n", + " )\n", + " self.decoder = torch.nn.Sequential(\n", + " torch.nn.Linear(3, encoder_size),\n", + " torch.nn.ReLU(),\n", + " torch.nn.Linear(encoder_size, 28 * 28),\n", + " )\n", + "\n", + " def training_step(self, batch, batch_idx):\n", + " x, y = batch\n", + " x = x.view(x.size(0), -1)\n", + " z = self.encoder(x)\n", + " x_hat = self.decoder(z)\n", + " train_mse = torch.nn.functional.mse_loss(x_hat, x)\n", + " self.log(\"train_mse\", train_mse)\n", + " return train_mse\n", + "\n", + " def validation_step(self, batch, batch_idx):\n", + " x, y = batch\n", + " x = x.view(x.size(0), -1)\n", + " z = self.encoder(x)\n", + " x_hat = self.decoder(z)\n", + " val_mse = torch.nn.functional.mse_loss(x_hat, x)\n", + " self.log(\"val_mse\", val_mse)\n", + " return val_mse\n", + "\n", + " def configure_optimizers(self):\n", + " optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)\n", + " return optimizer" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "St0ElX9obqRS" + }, + "source": [ + "### Dataset and loaders" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T5s53qgr_B2h" + }, + "outputs": [], + "source": [ + "from torchvision.datasets import MNIST\n", + "from torchvision import transforms\n", + "\n", + "transform = transforms.ToTensor()\n", + "train_set = MNIST(root=\"MNIST\", download=True, train=True, transform=transform)\n", + "validation_set = MNIST(root=\"MNIST\", download=True, train=False, transform=transform)\n", + "train_loader = torch.utils.data.DataLoader(train_set)\n", + "validation_loader = torch.utils.data.DataLoader(validation_set)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ttiwwreH_B2i" + }, + "source": [ + "# Tracking experiments with DVCLive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sE6qj6BMoDkn" + }, + "outputs": [], + "source": [ + "from dvclive.lightning import DVCLiveLogger" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XDqNY8pL_B2i" + }, + "outputs": [], + "source": [ + "for encoder_size in (64, 128):\n", + " for lr in (1e-3, 0.1):\n", + " model = LitAutoEncoder(encoder_size=encoder_size, lr=lr)\n", + " trainer = pl.Trainer(\n", + " limit_train_batches=200,\n", + " limit_val_batches=100,\n", + " max_epochs=5,\n", + " logger=DVCLiveLogger(log_model=True, report=\"notebook\"),\n", + " )\n", + " trainer.fit(model, train_loader, validation_loader)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7zEi0BXp_B2i" + }, + "source": [ + "## Comparing results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1aHmLHmf_B2i" + }, + "outputs": [], + "source": [ + "import dvc.api\n", + "import pandas as pd\n", + "\n", + "columns = [\"Experiment\", \"encoder_size\", \"lr\", \"train.mse\", \"val.mse\"]\n", + "\n", + "df = pd.DataFrame(dvc.api.exp_show(), columns=columns)\n", + "\n", + "df.dropna(inplace=True)\n", + "df.reset_index(drop=True, inplace=True)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "db42qeHEGqTA" + }, + "outputs": [], + "source": [ + "from plotly.express import parallel_coordinates\n", + "\n", + "fig = parallel_coordinates(df, columns, color=\"val.mse\")\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3cfvi0Uk_B2j" + }, + "outputs": [], + "source": [ + "!dvc plots diff $(dvc exp list --names-only)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Zx5n2zbn_B2j" + }, + "outputs": [], + "source": [ + "from IPython.display import HTML\n", + "\n", + "HTML(filename=\"./dvc_plots/index.html\")" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/examples/DVCLive-Quickstart.ipynb b/examples/DVCLive-Quickstart.ipynb index 00026152..b628c4e3 100644 --- a/examples/DVCLive-Quickstart.ipynb +++ b/examples/DVCLive-Quickstart.ipynb @@ -65,11 +65,10 @@ "metadata": {}, "outputs": [], "source": [ - "#@title Training helpers. { display-mode: \"form\" }\n", + "# @title Training helpers. { display-mode: \"form\" }\n", "\n", "import numpy as np\n", "import torch\n", - "import torch.nn.functional as F\n", "import torchvision\n", "\n", "from dvclive import Live\n", @@ -79,15 +78,14 @@ "\n", "def transform(dataset):\n", " \"\"\"Get inputs and targets from dataset.\"\"\"\n", - " x = dataset.data.reshape(len(dataset.data), 1, 28, 28)/255\n", + " x = dataset.data.reshape(len(dataset.data), 1, 28, 28) / 255\n", " y = dataset.targets\n", " return x.to(device), y.to(device)\n", "\n", "\n", "def train_one_epoch(model, criterion, x, y, lr, weight_decay):\n", " model.train()\n", - " optimizer = torch.optim.Adam(\n", - " model.parameters(), lr=lr, weight_decay=weight_decay)\n", + " optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n", " y_pred = model(x)\n", " loss = criterion(y_pred, y)\n", " optimizer.zero_grad()\n", @@ -108,7 +106,7 @@ " metrics = {}\n", " criterion = torch.nn.CrossEntropyLoss()\n", " metrics[\"loss\"] = criterion(y_pred, y).item()\n", - " metrics[\"acc\"] = (y_pred_label == y).sum().item()/len(y)\n", + " metrics[\"acc\"] = (y_pred_label == y).sum().item() / len(y)\n", " return metrics\n", "\n", "\n", @@ -123,6 +121,7 @@ "\n", " return metrics, actual, predicted\n", "\n", + "\n", "def get_missclassified_image(actual, predicted, dataset):\n", " confusion = {}\n", " for n, (a, p) in enumerate(zip(actual, predicted)):\n", @@ -130,20 +129,23 @@ " confusion[(a, p)] = image\n", "\n", " max_i, max_j = 0, 0\n", - " for (i, j) in confusion:\n", - " if i > max_i:\n", - " max_i = i\n", - " if j > max_j:\n", - " max_j = j\n", + " for i, j in confusion:\n", + " max_i = max(i, max_i)\n", + " max_j = max(j, max_j)\n", "\n", " frame_size = 30\n", " image_shape = (28, 28)\n", " incorrect_color = np.array((255, 100, 100), dtype=\"uint8\")\n", " label_color = np.array((100, 100, 240), dtype=\"uint8\")\n", "\n", - " out_matrix = np.ones(shape=((max_i+2) * frame_size, (max_j+2) * frame_size, 3), dtype=\"uint8\") * 240\n", + " out_matrix = (\n", + " np.ones(\n", + " shape=((max_i + 2) * frame_size, (max_j + 2) * frame_size, 3), dtype=\"uint8\"\n", + " )\n", + " * 240\n", + " )\n", "\n", - " for i in range(max_i+1):\n", + " for i in range(max_i + 1):\n", " if (i, i) in confusion:\n", " image = confusion[(i, i)]\n", " xs = (i + 1) * frame_size + 1\n", @@ -154,14 +156,14 @@ " out_matrix[xs:xe, ys:ye, c] = (1 - image) * label_color[c]\n", " out_matrix[ys:ye, xs:xe, c] = (1 - image) * label_color[c]\n", "\n", - " for (i, j) in confusion:\n", + " for i, j in confusion:\n", " image = confusion[(i, j)]\n", " assert image.shape == image_shape\n", " xs = (i + 1) * frame_size + 1\n", " xe = (i + 2) * frame_size - 1\n", " ys = (j + 1) * frame_size + 1\n", " ye = (j + 2) * frame_size - 1\n", - " assert (xe-xs, ye-ys) == image_shape\n", + " assert (xe - xs, ye - ys) == image_shape\n", " if i != j:\n", " for c in range(3):\n", " out_matrix[xs:xe, ys:ye, c] = (1 - image) * incorrect_color[c]\n", @@ -175,7 +177,7 @@ "metadata": {}, "outputs": [], "source": [ - "#@title Initialize model and dataset. { display-mode: \"form\" }\n", + "# @title Initialize model and dataset. { display-mode: \"form\" }\n", "\n", "model = torch.nn.Sequential(\n", " torch.nn.Flatten(),\n", @@ -212,40 +214,33 @@ "source": [ "# You can modify these parameters to see how they affect the training\n", "# And run the cell several times\n", - "params = {\n", - " \"epochs\": 5,\n", - " \"lr\": 0.003,\n", - " \"weight_decay\": 0\n", - "}\n", + "params = {\"epochs\": 5, \"lr\": 0.003, \"weight_decay\": 0}\n", "\n", "best_test_acc = 0\n", "\n", "with Live(report=\"notebook\") as live:\n", - "\n", " live.log_params(params)\n", "\n", " for _ in range(params[\"epochs\"]):\n", - "\n", " train_one_epoch(\n", " model, criterion, x_train, y_train, params[\"lr\"], params[\"weight_decay\"]\n", " )\n", "\n", " # Train Evaluation\n", - " metrics_train, acual_train, predicted_train = evaluate(\n", - " model, x_train, y_train)\n", + " metrics_train, acual_train, predicted_train = evaluate(model, x_train, y_train)\n", "\n", " for k, v in metrics_train.items():\n", " live.log_metric(f\"train/{k}\", v)\n", "\n", " live.log_sklearn_plot(\n", - " \"confusion_matrix\", \n", - " acual_train, predicted_train, \n", - " name=\"train/confusion_matrix\"\n", + " \"confusion_matrix\",\n", + " acual_train,\n", + " predicted_train,\n", + " name=\"train/confusion_matrix\",\n", " )\n", "\n", " # Test Evaluation\n", - " metrics_test, actual, predicted = evaluate(\n", - " model, x_test, y_test)\n", + " metrics_test, actual, predicted = evaluate(model, x_test, y_test)\n", "\n", " for k, v in metrics_test.items():\n", " live.log_metric(f\"test/{k}\", v)\n", @@ -255,8 +250,7 @@ " )\n", "\n", " live.log_image(\n", - " \"misclassified.jpg\", \n", - " get_missclassified_image(actual, predicted, mnist_test)\n", + " \"misclassified.jpg\", get_missclassified_image(actual, predicted, mnist_test)\n", " )\n", "\n", " # Save best model\n", @@ -291,7 +285,7 @@ "\n", "df.dropna(inplace=True)\n", "df.reset_index(drop=True, inplace=True)\n", - "df\n" + "df" ] }, { @@ -322,7 +316,8 @@ "outputs": [], "source": [ "from IPython.display import HTML\n", - "HTML(filename='./dvc_plots/index.html')" + "\n", + "HTML(filename=\"./dvc_plots/index.html\")" ] } ], diff --git a/examples/DVCLive-YOLO.ipynb b/examples/DVCLive-YOLO.ipynb index 4c25a4f6..ed69e5ce 100644 --- a/examples/DVCLive-YOLO.ipynb +++ b/examples/DVCLive-YOLO.ipynb @@ -29,6 +29,7 @@ "source": [ "%pip install dvclive ultralytics\n", "import ultralytics\n", + "\n", "ultralytics.checks()" ] }, @@ -104,6 +105,7 @@ "outputs": [], "source": [ "from plotly.express import parallel_coordinates\n", + "\n", "fig = parallel_coordinates(df, columns, color=\"metrics.mAP50-95(B)\")\n", "fig.show()" ] @@ -114,7 +116,7 @@ "metadata": {}, "outputs": [], "source": [ - "!dvc plots diff $(dvc exp list --names-only) " + "!dvc plots diff $(dvc exp list --names-only)" ] }, { @@ -124,7 +126,8 @@ "outputs": [], "source": [ "from IPython.display import HTML\n", - "HTML(filename='./dvc_plots/index.html')" + "\n", + "HTML(filename=\"./dvc_plots/index.html\")" ] } ], diff --git a/tests/conftest.py b/tests/conftest.py index 8b0e0dc7..ce7fc257 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,13 +7,13 @@ from dvclive.utils import rel_path -@pytest.fixture() +@pytest.fixture def tmp_dir(tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) return tmp_path -@pytest.fixture() +@pytest.fixture def mocked_dvc_repo(tmp_dir, mocker): _dvc_repo = mocker.MagicMock() _dvc_repo.index.stages = [] @@ -28,13 +28,13 @@ def mocked_dvc_repo(tmp_dir, mocker): return _dvc_repo -@pytest.fixture() +@pytest.fixture def mocked_dvc_subrepo(tmp_dir, mocker, mocked_dvc_repo): mocked_dvc_repo.root_dir = tmp_dir / "subdir" return mocked_dvc_repo -@pytest.fixture() +@pytest.fixture def dvc_repo(tmp_dir): from dvc.repo import Repo from scmrepo.git import Git @@ -62,7 +62,7 @@ def _mocked_ci(monkeypatch): monkeypatch.setenv("CI", "false") -@pytest.fixture() +@pytest.fixture def mocked_studio_post(mocker, monkeypatch): valid_response = mocker.MagicMock() valid_response.status_code = 200 diff --git a/tests/frameworks/test_fastai.py b/tests/frameworks/test_fastai.py index 452e54cd..77bdfa76 100644 --- a/tests/frameworks/test_fastai.py +++ b/tests/frameworks/test_fastai.py @@ -21,7 +21,7 @@ pytest.skip("skipping fastai tests", allow_module_level=True) -@pytest.fixture() +@pytest.fixture def data_loader(): from pandas import DataFrame diff --git a/tests/frameworks/test_huggingface.py b/tests/frameworks/test_huggingface.py index 18b39901..a057d942 100644 --- a/tests/frameworks/test_huggingface.py +++ b/tests/frameworks/test_huggingface.py @@ -84,18 +84,18 @@ def forward(self, input_x, labels=None, **kwargs): return (loss, y, y) if self.double_output else (loss, y) -@pytest.fixture() +@pytest.fixture def data(): return RegressionDataset(), RegressionDataset() -@pytest.fixture() +@pytest.fixture def model(): config = RegressionModelConfig() return RegressionPreTrainedModel(config) -@pytest.fixture() +@pytest.fixture def args(): return TrainingArguments( "foo", diff --git a/tests/frameworks/test_keras.py b/tests/frameworks/test_keras.py index 46239091..ceca0baa 100644 --- a/tests/frameworks/test_keras.py +++ b/tests/frameworks/test_keras.py @@ -12,7 +12,7 @@ pytest.skip("skipping keras tests", allow_module_level=True) -@pytest.fixture() +@pytest.fixture def xor_model(): import numpy as np import tensorflow as tf diff --git a/tests/frameworks/test_lgbm.py b/tests/frameworks/test_lgbm.py index 250f355a..749365b2 100644 --- a/tests/frameworks/test_lgbm.py +++ b/tests/frameworks/test_lgbm.py @@ -17,12 +17,12 @@ pytest.skip("skipping lightgbm tests", allow_module_level=True) -@pytest.fixture() +@pytest.fixture def model_params(): return {"objective": "multiclass", "n_estimators": 5, "seed": 0} -@pytest.fixture() +@pytest.fixture def iris_data(): iris = datasets.load_iris() x = pd.DataFrame(iris["data"], columns=iris["feature_names"]) diff --git a/tests/frameworks/test_xgboost.py b/tests/frameworks/test_xgboost.py index 0b375450..dff0ec28 100644 --- a/tests/frameworks/test_xgboost.py +++ b/tests/frameworks/test_xgboost.py @@ -18,12 +18,12 @@ pytest.skip("skipping xgboost tests", allow_module_level=True) -@pytest.fixture() +@pytest.fixture def train_params(): return {"objective": "multi:softmax", "num_class": 3, "seed": 0} -@pytest.fixture() +@pytest.fixture def iris_data(): iris = datasets.load_iris() x = pd.DataFrame(iris["data"], columns=iris["feature_names"]) @@ -31,7 +31,7 @@ def iris_data(): return xgb.DMatrix(x, y) -@pytest.fixture() +@pytest.fixture def iris_train_eval_data(): iris = datasets.load_iris() x_train, x_eval, y_train, y_eval = train_test_split( diff --git a/tests/plots/test_sklearn.py b/tests/plots/test_sklearn.py index 85b85dd9..26ccb464 100644 --- a/tests/plots/test_sklearn.py +++ b/tests/plots/test_sklearn.py @@ -8,7 +8,7 @@ from dvclive.plots.sklearn import SKLearnPlot -@pytest.fixture() +@pytest.fixture def y_true_y_pred_y_score(): from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier diff --git a/tests/test_make_report.py b/tests/test_make_report.py index 4ccbc136..a5a2e7f8 100644 --- a/tests/test_make_report.py +++ b/tests/test_make_report.py @@ -104,7 +104,7 @@ def test_make_report(tmp_dir, mode): last_report = current_report -@pytest.mark.vscode() +@pytest.mark.vscode def test_make_report_open(tmp_dir, mocker, monkeypatch): mocked_open = mocker.patch("webbrowser.open") live = Live() diff --git a/tests/test_monitor_system.py b/tests/test_monitor_system.py index d704f00b..28d175bb 100644 --- a/tests/test_monitor_system.py +++ b/tests/test_monitor_system.py @@ -89,7 +89,7 @@ def mock_pynvml(mocker, num_gpus=2): mocker.patch(f"{prefix}.nvmlDeviceGetUtilizationRates", return_value=gpu_usage) -@pytest.fixture() +@pytest.fixture def cpu_metrics(): content = { METRIC_CPU_COUNT: 6, @@ -112,7 +112,7 @@ def _timeserie_schema(name, value): return [{name: str(value), "timestamp": str, "step": "0"}] -@pytest.fixture() +@pytest.fixture def cpu_timeseries(): return { f"{METRIC_CPU_USAGE_PERCENT}.tsv": _timeserie_schema( @@ -132,7 +132,7 @@ def cpu_timeseries(): } -@pytest.fixture() +@pytest.fixture def gpu_timeseries(): return { f"{METRIC_GPU_USAGE_PERCENT}/0.tsv": _timeserie_schema("0", 50.0), diff --git a/tests/test_post_to_studio.py b/tests/test_post_to_studio.py index a9ac8383..3f585eb0 100644 --- a/tests/test_post_to_studio.py +++ b/tests/test_post_to_studio.py @@ -211,7 +211,7 @@ def long_post(*args, **kwargs): assert metrics_file.read_text() == metrics_content -@pytest.mark.studio() +@pytest.mark.studio def test_post_to_studio_skip_start_and_done_on_env_var( tmp_dir, mocked_dvc_repo, mocked_studio_post, monkeypatch ): @@ -230,7 +230,7 @@ def test_post_to_studio_skip_start_and_done_on_env_var( assert "done" not in call_types -@pytest.mark.studio() +@pytest.mark.studio def test_post_to_studio_dvc_studio_config( tmp_dir, mocker, mocked_dvc_repo, mocked_studio_post, monkeypatch ): @@ -252,7 +252,7 @@ def test_post_to_studio_dvc_studio_config( assert mocked_post.call_args.kwargs["headers"]["Authorization"] == "token token" -@pytest.mark.studio() +@pytest.mark.studio def test_post_to_studio_skip_if_no_token( tmp_dir, mocker, @@ -297,7 +297,7 @@ def test_post_to_studio_shorten_names(tmp_dir, mocked_dvc_repo, mocked_studio_po ) -@pytest.mark.studio() +@pytest.mark.studio def test_post_to_studio_inside_dvc_exp( tmp_dir, mocker, monkeypatch, mocked_studio_post, mocked_dvc_repo ): @@ -318,7 +318,7 @@ def test_post_to_studio_inside_dvc_exp( assert "done" not in call_types -@pytest.mark.studio() +@pytest.mark.studio def test_post_to_studio_inside_subdir( tmp_dir, dvc_repo, mocker, monkeypatch, mocked_studio_post, mocked_dvc_repo ): @@ -346,7 +346,7 @@ def test_post_to_studio_inside_subdir( ) -@pytest.mark.studio() +@pytest.mark.studio def test_post_to_studio_inside_subdir_dvc_exp( tmp_dir, dvc_repo, monkeypatch, mocked_studio_post, mocked_dvc_repo ): @@ -468,7 +468,7 @@ def test_post_to_studio_if_done_skipped(tmp_dir, mocked_dvc_repo, mocked_studio_ assert "data" in call_types -@pytest.mark.studio() +@pytest.mark.studio def test_post_to_studio_no_repo(tmp_dir, monkeypatch, mocked_studio_post): monkeypatch.setenv(DVC_STUDIO_TOKEN, "STUDIO_TOKEN") monkeypatch.setenv(DVC_STUDIO_REPO_URL, "STUDIO_REPO_URL") @@ -524,7 +524,7 @@ def test_post_to_studio_no_repo(tmp_dir, monkeypatch, mocked_studio_post): ) -@pytest.mark.studio() +@pytest.mark.studio def test_post_to_studio_skip_if_no_repo_url( tmp_dir, mocker, diff --git a/tests/test_vscode.py b/tests/test_vscode.py index 7f56ab09..93b28565 100644 --- a/tests/test_vscode.py +++ b/tests/test_vscode.py @@ -6,7 +6,7 @@ from dvclive import Live, env -@pytest.mark.vscode() +@pytest.mark.vscode @pytest.mark.parametrize("dvc_root", [True, False]) def test_vscode_dvclive_step_completed_signal_file( tmp_dir, dvc_root, mocker, monkeypatch @@ -58,7 +58,7 @@ def test_vscode_dvclive_step_completed_signal_file( assert not os.path.exists(signal_file) -@pytest.mark.vscode() +@pytest.mark.vscode @pytest.mark.parametrize("dvc_root", [True, False]) def test_vscode_dvclive_only_signal_file(tmp_dir, dvc_root, mocker): signal_file = os.path.join(tmp_dir, ".dvc", "tmp", "exps", "run", "DVCLIVE_ONLY")