diff --git a/notebooks/utils.py b/notebooks/utils.py index 146a11fa..bcc581c6 100644 --- a/notebooks/utils.py +++ b/notebooks/utils.py @@ -50,7 +50,7 @@ def __init__(self, start_date=datetime(2018, 1, 1), end_date=datetime(2023, 12, continuous_update=False, readout=True, behaviour="drag", - layout={"width": "80%", "height": "100px", "margin": "0 auto 0 auto"}, + layout={"width": "90%", "height": "100px", "margin": "0 auto 0 auto"}, style={ "handle_color": "dodgerblue", }, @@ -83,34 +83,13 @@ def show_slider(self): """ ) - # Generate a list of dates for the ticks every 3 months - tick_dates = pd.date_range( - self.start_date, self.end_date + pd.DateOffset(months=3), freq="5ME" - ) - - # Create a list of tick labels in the format "Aug 2023" - tick_labels = [date.strftime("%b
%Y") for date in tick_dates] - - # Calculate the positions of the ticks to align them with the slider - total_days = (self.end_date - self.start_date).days - tick_positions = [ - ((date - self.start_date).days / total_days * 100) for date in tick_dates - ] - - # Create a text widget to display the tick labels with calculated positions - tick_widget_html = "
" - for label, position in zip(tick_labels, tick_positions): - tick_widget_html += f"
{label}
" - tick_widget_html += "
" - - tick_widget = widgets.HTML( - value=tick_widget_html, layout={"width": "80%", "margin": "0 auto 0 auto"} - ) - # Arrange the text widget, interval slider, and tick widget using VBox vbox_with_ticks = widgets.VBox( - [descr_widget, self.interval_slider, tick_widget], - layout={"height": "200px"}, + [ + descr_widget, + self.interval_slider, + ], + layout={"height": "150px"}, ) display(vbox_with_ticks) diff --git a/notebooks/worldcereal_v1_demo_custom_cropland.ipynb b/notebooks/worldcereal_v1_demo_custom_cropland.ipynb index 623fbad8..ace37c3a 100644 --- a/notebooks/worldcereal_v1_demo_custom_cropland.ipynb +++ b/notebooks/worldcereal_v1_demo_custom_cropland.ipynb @@ -38,7 +38,8 @@ "source": [ "### Before you start\n", "\n", - "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n", + "This is free of charge and will grant you a number of free openEO processing credits to continue this demo." ] }, { @@ -100,7 +101,7 @@ "polygon = map.get_polygon_latlon()\n", "\n", "# Query our public database of training data\n", - "public_df = query_public_extractions(polygon, filter_cropland=False)\n", + "public_df = query_public_extractions(polygon, filter_cropland=False, buffer=250000)\n", "public_df.year.value_counts()" ] }, @@ -285,9 +286,8 @@ "import os\n", "from pathlib import Path\n", "\n", - "# Specify the local directory where the resulting maps should be downloaded to.\n", - "run = get_input(\"model run\")\n", - "output_dir = Path(os.getcwd()) / f'CROPLAND_{modelname}_{run}'\n", + "# The output directory is named after the model\n", + "output_dir = Path(os.getcwd()) / f'CROPLAND_{modelname}'\n", "print(f\"Output directory: {output_dir}\")" ] }, @@ -329,6 +329,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The results contain the openeo job id...\n", + "print(f\"Job id: {job_results.job_id}\")\n", + "print(f\"Location of metadata: {job_results.metadata}\")\n", + "#... a list of products that were downloaded...\n", + "print(f\"Products: {job_results.products.keys()}\")\n", + "# ... for each product:\n", + "print('-- For each product --')\n", + "print(f\"Type: {job_results.products['cropland']['type']}\")\n", + "print(f\"Temporal extent: {job_results.products['cropland']['temporal_extent']}\")\n", + "print(f\"Look-up table: {job_results.products['cropland']['lut']}\")\n", + "print(f\"URL: {job_results.products['cropland']['url']}\")\n", + "print(f\"Local path: {job_results.products['cropland']['path']}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb b/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb deleted file mode 100644 index f55abeac..00000000 --- a/notebooks/worldcereal_v1_demo_custom_cropland_extended.ipynb +++ /dev/null @@ -1,443 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![](./resources/System_v1_custom_cropland.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Introduction\n", - "\n", - "This notebook guides you through the process of training a custom cropland classification model using publicly available and harmonized in-situ reference data for your area of interest. Afterwards, the model can be applied to your area and season of interest to generate a cropland extent map." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Content\n", - " \n", - "- [Before you start](###-Before-you-start)\n", - "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", - "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", - "- [3. Create your custom cropland class](#3.-Create-your-custom-cropland-class)\n", - "- [4. Prepare training features](#4.-Prepare-training-features)\n", - "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", - "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", - "- [7. Generate a map](#7.-Generate-a-map)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Before you start\n", - "\n", - "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Define your region of interest\n", - "\n", - "When running the code snippet below, an interactive map will be visualized.\n", - "Click the Rectangle button on the left hand side of the map to start drawing your region of interest.\n", - "The widget will automatically store the coordinates of the last rectangle you drew on the map.\n", - "\n", - "
\n", - "Processing area limitation:
\n", - "Processing areas beyond 2500 km² are currently not supported to avoid excessive credit usage and long processing times.
\n", - "Upon exceeding this limit, an error will be shown, and you will need to draw a new rectangle.\n", - "\n", - "For testing purposes, we recommend you to select a small area (< 250 km²) in order to limit processing time and credit usage.\n", - "\n", - "A run of 250 km² will typically consume 40 credits and last around 20 mins.
\n", - "A run of 750 km² will typically consume 90 credits and last around 50 mins.
\n", - "A run of 2500 km² will typically consume 250 credits and last around 1h 40 mins.\n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from worldcereal.utils.map import ui_map\n", - "\n", - "map = ui_map()\n", - "map.show_map()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Extract public reference data\n", - "\n", - "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", - "To increase the number of hits, we expand the search area by 250 km in all directions.\n", - "\n", - "We print the number of training samples retrieved per year." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from worldcereal.utils.refdata import query_public_extractions\n", - "\n", - "# retrieve the polygon you just drew\n", - "polygon = map.get_polygon_latlon()\n", - "\n", - "# Query our public database of training data\n", - "public_df = query_public_extractions(polygon, filter_cropland=False)\n", - "public_df.year.value_counts()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Create your custom cropland class\n", - "\n", - "Run the next cell and select all land cover classes you would like to include in your \"cropland\" class. All classes that are not selected will be grouped under the \"other\" category. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from utils import pick_croptypes\n", - "from utils import select_landcover\n", - "from IPython.display import display\n", - "\n", - "checkbox, checkbox_widgets = select_landcover(public_df)\n", - "display(checkbox)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Based on your selection, a custom target label is now generated for each sample. Verify that only land cover classes of your choice are appearing in the `downstream_class`, all others will fall under `other`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import get_custom_cropland_labels\n", - "\n", - "public_df = get_custom_cropland_labels(public_df, checkbox_widgets)\n", - "public_df[\"downstream_class\"].value_counts()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Prepare training features\n", - "\n", - "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import prepare_training_dataframe\n", - "\n", - "training_dataframe = prepare_training_dataframe(public_df, task_type=\"cropland\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Train custom classification model\n", - "We train a catboost model for the selected land cover classes. Class weights are automatically determined to balance the individual classes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import train_cropland_classifier\n", - "\n", - "custom_model, report, confusion_matrix = train_cropland_classifier(training_dataframe)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Before training, the available training data has been automatically split into a calibration and validation part. By executing the next cell, you get an idea of how well the model performs on the independent validation set." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Print the classification report\n", - "print(report)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 6. Deploy your custom model\n", - "\n", - "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from worldcereal.utils.upload import deploy_model\n", - "from openeo_gfmap.backend import cdse_connection\n", - "from utils import get_input\n", - "\n", - "modelname = get_input(\"model\")\n", - "model_url = deploy_model(cdse_connection(), custom_model, pattern=modelname)\n", - "print(f\"Your model can be downloaded from: {model_url}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 7. Generate a map\n", - "\n", - "Using our custom model, we generate a map for our region and season of interest.\n", - "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import retrieve_worldcereal_seasons\n", - "\n", - "spatial_extent = map.get_processing_extent()\n", - "seasons = retrieve_worldcereal_seasons(spatial_extent)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", - "Just make sure your season of interest is fully captured within the period you select." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import date_slider\n", - "\n", - "slider = date_slider()\n", - "slider.show_slider()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set some other customization options:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from worldcereal.job import PostprocessParameters\n", - "import os\n", - "from pathlib import Path\n", - "\n", - "# Choose whether or not you want to spatially clean the classification results\n", - "postprocess_result = True\n", - "# Choose the postprocessing method you want to use [\"smooth_probabilities\", \"majority_vote\"]\n", - "# (\"smooth_probabilities will do limited spatial cleaning,\n", - "# while \"majority_vote\" will do more aggressive spatial cleaning, depending on the value of kernel_size)\n", - "postprocess_method = \"majority_vote\"\n", - "# Additional parameter for the majority vote method \n", - "# (the higher the value, the more aggressive the spatial cleaning,\n", - "# should be an odd number, not larger than 25, default = 5)\n", - "kernel_size = 5\n", - "# Do you want to save the intermediate results (before applying the postprocessing)\n", - "save_intermediate = True\n", - "# Do you want to save all class probabilities in the final product?\n", - "keep_class_probs = True\n", - "\n", - "postprocess_parameters = PostprocessParameters(enable=postprocess_result,\n", - " method=postprocess_method,\n", - " kernel_size=kernel_size,\n", - " save_intermediate=save_intermediate,\n", - " keep_class_probs=keep_class_probs)\n", - "\n", - "# Specify the local directory where the resulting maps should be downloaded to.\n", - "run = get_input(\"model run\")\n", - "output_dir = Path(os.getcwd()) / f'CROPLAND_{modelname}_{run}'\n", - "print(f\"Output directory: {output_dir}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We now have all information we need to generate our map!
\n", - "The next cell will submit a map inference job on CDSE through OpenEO.
\n", - "The first time you run this, you will be asked to authenticate with your CDSE account by clicking the link provided below the cell.
\n", - "Then sit back and wait untill your map is ready..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from worldcereal.job import generate_map, CropLandParameters\n", - "\n", - "# Initializes default parameters\n", - "parameters = CropLandParameters()\n", - "\n", - "# Change the URL to your custom classification model\n", - "parameters.classifier_parameters.classifier_url = model_url\n", - "\n", - "# Get processing period and area\n", - "processing_period = slider.get_processing_period()\n", - "processing_extent = map.get_processing_extent()\n", - "\n", - "# Launch the job\n", - "job_results = generate_map(\n", - " processing_extent,\n", - " processing_period,\n", - " output_dir=output_dir,\n", - " cropland_parameters=parameters,\n", - " postprocess_parameters=postprocess_parameters,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The classification results will be automatically downloaded to your *output_dir* in .tif format.\n", - "\n", - "For a model with two classes, you get a raster file containing a maximum of four bands:\n", - "1. The label of the winning class\n", - "2. The probability of the winning class [0 - 100]\n", - "3. and beyond (optional,depending on settings): Class probabilities of each class, ordered according to the look-up table. The look-up table for each product can be consulted in the 'results' object as produced by the 'generate_map' function.\n", - "\n", - "Using the function below, we split this information into separate .tif files, thereby adding metadata and a color map, to ease interpretation and visualization:\n", - "- \"xxx_classification_start-date_end-date.tif\" --> contains the classification labels. A class look-up table is included in the .tif metadata.\n", - "- \"xxx_probability_start-date_end-date.tif\" --> contains the probability associated with the prediction [0 - 100]\n", - "\n", - "In case you chose to store the original per-class probabilities, these are NOT written to a separate file and need to be consulted in the original result downloaded from OpenEO." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import prepare_visualization\n", - "\n", - "rasters = prepare_visualization(job_results)\n", - "print(rasters)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The resulting raster files can be visualized in QGIS.\n", - "\n", - "
\n", - "WARNING:
\n", - "In case you run this notebook through the Terrascope environment, ALWAYS make sure you download the resulting files to your local system!
\n", - "The Terrascope environment will be cleaned automatically upon exit!\n", - "
\n", - "\n", - "In case you are running this script on your local environment, you can alternatively use the following cells to visualize the outputs directly in this notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import visualize_products\n", - "\n", - "visualize_products(rasters, port=8887)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import show_color_legend\n", - "\n", - "show_color_legend(rasters, \"cropland\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "worldcereal", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/worldcereal_v1_demo_custom_croptype.ipynb b/notebooks/worldcereal_v1_demo_custom_croptype.ipynb index c9c308bd..86abbc24 100644 --- a/notebooks/worldcereal_v1_demo_custom_croptype.ipynb +++ b/notebooks/worldcereal_v1_demo_custom_croptype.ipynb @@ -24,12 +24,13 @@ " \n", "- [Before you start](###-Before-you-start)\n", "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", - "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", - "- [3. Select your desired crop types](#3.-Select-your-desired-crop-types)\n", - "- [4. Prepare training features](#4.-Prepare-training-features)\n", - "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", - "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", - "- [7. Generate a map](#7.-Generate-a-map)\n" + "- [2. Define your temporal extent](#2.-Define-your-temporal-extent)\n", + "- [3. Extract public reference data](#3-extract-public-reference-data)\n", + "- [4. Select your desired crop types](#4.-Select-your-desired-crop-types)\n", + "- [5. Prepare training features](#5.-Prepare-training-features)\n", + "- [6. Train custom classification model](#6.-Train-custom-classification-model)\n", + "- [7. Deploy your custom model](#7.-Deploy-your-custom-model)\n", + "- [8. Generate a map](#8.-Generate-a-map)\n" ] }, { @@ -38,7 +39,8 @@ "source": [ "### Before you start\n", "\n", - "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n", + "This is free of charge and will grant you a number of free openEO processing credits to continue this demo." ] }, { @@ -80,7 +82,48 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 2. Extract public reference data\n", + "### 2. Define your temporal extent\n", + "\n", + "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import retrieve_worldcereal_seasons\n", + "\n", + "spatial_extent = map.get_processing_extent()\n", + "seasons = retrieve_worldcereal_seasons(spatial_extent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", + "Just make sure your season of interest is fully captured within the period you select." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import date_slider\n", + "\n", + "slider = date_slider()\n", + "slider.show_slider()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Extract public reference data\n", "\n", "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", "To increase the number of hits, we expand the search area by 250 km in all directions.\n", @@ -96,11 +139,14 @@ "source": [ "from worldcereal.utils.refdata import query_public_extractions\n", "\n", - "# retrieve the polygon you just drew\n", + "# Retrieve the polygon you just drew\n", "polygon = map.get_polygon_latlon()\n", "\n", + "# Retrieve the date range you just selected\n", + "processing_period = slider.get_processing_period()\n", + "\n", "# Query our public database of training data\n", - "public_df = query_public_extractions(polygon)\n", + "public_df = query_public_extractions(polygon, processing_period=processing_period, buffer=250000)\n", "public_df.year.value_counts()" ] }, @@ -108,7 +154,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Select your desired crop types\n", + "### 4. Select your desired crop types\n", "\n", "Run the next cell and select all crop types you wish to include in your model. All the crops that are not selected will be grouped under the \"other\" category." ] @@ -149,9 +195,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Prepare training features\n", + "### 5. Prepare training features\n", "\n", - "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." + "Using a deep learning framework (Presto), we derive classification features for each sample in the dataframe resulting from your query. Presto was pre-trained on millions of unlabeled samples around the world and finetuned on global labelled land cover and crop type data from the WorldCereal reference database. The resulting *embeddings* and the *target* labels to train on will be returned as a training dataframe which we will use for downstream model training." ] }, { @@ -169,8 +215,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5. Train custom classification model\n", - "We train a catboost model for the selected crop types. Class weights are automatically determined to balance the individual classes." + "### 6. Train custom classification model\n", + "We train a catboost model for the selected crop types. By default, no class weighting is done. You could opt to enable this by setting `balance_classes=True`, however, depending on the class distribution this may lead to undesired results. There is no golden rule here." ] }, { @@ -181,7 +227,7 @@ "source": [ "from utils import train_classifier\n", "\n", - "custom_model, report, confusion_matrix = train_classifier(training_dataframe)" + "custom_model, report, confusion_matrix = train_classifier(training_dataframe, balance_classes=False)" ] }, { @@ -205,7 +251,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Deploy your custom model\n", + "### 7. Deploy your custom model\n", "\n", "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n" ] @@ -229,48 +275,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 7. Generate a map\n", + "### 8. Generate a map\n", "\n", "Using our custom model, we generate a map for our region and season of interest.\n", - "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import retrieve_worldcereal_seasons\n", - "\n", - "spatial_extent = map.get_processing_extent()\n", - "seasons = retrieve_worldcereal_seasons(spatial_extent)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", - "Just make sure your season of interest is fully captured within the period you select." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import date_slider\n", "\n", - "slider = date_slider()\n", - "slider.show_slider()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "Set some other customization options:" ] }, @@ -283,9 +291,8 @@ "import os\n", "from pathlib import Path\n", "\n", - "# Specify the local directory where the resulting maps should be downloaded to.\n", - "run = get_input(\"model run\")\n", - "output_dir = Path(os.getcwd()) / f'CROPTYPE_{modelname}_{run}'\n", + "# The output directory is named after the model\n", + "output_dir = Path(os.getcwd()) / f'CROPTYPE_{modelname}'\n", "print(f\"Output directory: {output_dir}\")" ] }, @@ -329,6 +336,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The results contain the openeo job id...\n", + "print(f\"Job id: {job_results.job_id}\")\n", + "print(f\"Location of metadata: {job_results.metadata}\")\n", + "#... a list of products that were downloaded...\n", + "print(f\"Products: {job_results.products.keys()}\")\n", + "# ... for each product:\n", + "print('-- For each product --')\n", + "print(f\"Type: {job_results.products['croptype']['type']}\")\n", + "print(f\"Temporal extent: {job_results.products['croptype']['temporal_extent']}\")\n", + "print(f\"Look-up table: {job_results.products['croptype']['lut']}\")\n", + "print(f\"URL: {job_results.products['croptype']['url']}\")\n", + "print(f\"Local path: {job_results.products['croptype']['path']}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb b/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb index b0bb999d..305c1c37 100644 --- a/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb +++ b/notebooks/worldcereal_v1_demo_custom_croptype_extended.ipynb @@ -24,12 +24,13 @@ " \n", "- [Before you start](###-Before-you-start)\n", "- [1. Define your region of interest](#1.-Define-your-region-of-interest)\n", - "- [2. Extract public reference data](#2.-Extract-public-reference-data)\n", - "- [3. Select your desired crop types](#3.-Select-your-desired-crop-types)\n", - "- [4. Prepare training features](#4.-Prepare-training-features)\n", - "- [5. Train custom classification model](#5.-Train-custom-classification-model)\n", - "- [6. Deploy your custom model](#6.-Deploy-your-custom-model)\n", - "- [7. Generate a map](#7.-Generate-a-map)\n" + "- [2. Define your temporal extent](#2.-Define-your-temporal-extent)\n", + "- [3. Extract public reference data](#3-extract-public-reference-data)\n", + "- [4. Select your desired crop types](#4.-Select-your-desired-crop-types)\n", + "- [5. Prepare training features](#5.-Prepare-training-features)\n", + "- [6. Train custom classification model](#6.-Train-custom-classification-model)\n", + "- [7. Deploy your custom model](#7.-Deploy-your-custom-model)\n", + "- [8. Generate a map](#8.-Generate-a-map)\n" ] }, { @@ -38,7 +39,8 @@ "source": [ "### Before you start\n", "\n", - "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem (CDSE) registering [here](https://dataspace.copernicus.eu/). This is free of charge and will grant you a number of free openEO processing credits to continue this demo." + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n", + "This is free of charge and will grant you a number of free openEO processing credits to continue this demo." ] }, { @@ -64,6 +66,19 @@ "" ] }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# TEMPORARY CELL\n", + "import sys\n", + "sys.path.append('/home/jeroendegerickx/git/worldcereal/worldcereal-classification/notebooks')\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": null, @@ -80,7 +95,48 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 2. Extract public reference data\n", + "### 2. Define your temporal extent\n", + "\n", + "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import retrieve_worldcereal_seasons\n", + "\n", + "spatial_extent = map.get_processing_extent()\n", + "seasons = retrieve_worldcereal_seasons(spatial_extent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", + "Just make sure your season of interest is fully captured within the period you select." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import date_slider\n", + "\n", + "slider = date_slider()\n", + "slider.show_slider()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Extract public reference data\n", "\n", "Here we query existing reference data that have already been processed by WorldCereal and are ready to use.\n", "To increase the number of hits, we expand the search area by 250 km in all directions.\n", @@ -96,11 +152,14 @@ "source": [ "from worldcereal.utils.refdata import query_public_extractions\n", "\n", - "# retrieve the polygon you just drew\n", + "# Retrieve the polygon you just drew\n", "polygon = map.get_polygon_latlon()\n", "\n", + "# Retrieve the date range you just selected\n", + "processing_period = slider.get_processing_period()\n", + "\n", "# Query our public database of training data\n", - "public_df = query_public_extractions(polygon)\n", + "public_df = query_public_extractions(polygon, processing_period=processing_period, buffer=250000)\n", "public_df.year.value_counts()" ] }, @@ -108,7 +167,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Select your desired crop types\n", + "### 4. Select your desired crop types\n", "\n", "Run the next cell and select all crop types you wish to include in your model. All the crops that are not selected will be grouped under the \"other\" category." ] @@ -122,7 +181,7 @@ "from utils import pick_croptypes\n", "from IPython.display import display\n", "\n", - "checkbox, checkbox_widgets = pick_croptypes(public_df, samples_threshold=100)\n", + "checkbox, checkbox_widgets, updated_class_map = pick_croptypes(public_df, samples_threshold=100)\n", "display(checkbox)" ] }, @@ -141,7 +200,7 @@ "source": [ "from utils import get_custom_croptype_labels\n", "\n", - "public_df = get_custom_croptype_labels(public_df, checkbox_widgets)\n", + "public_df = get_custom_croptype_labels(public_df, checkbox_widgets, updated_class_map)\n", "public_df[\"downstream_class\"].value_counts()" ] }, @@ -149,9 +208,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Prepare training features\n", + "### 5. Prepare training features\n", "\n", - "Using a deep learning framework (Presto), we derive classification features for each sample. The resulting `encodings` and `targets` will be used for model training." + "Using a deep learning framework (Presto), we derive classification features for each sample in the dataframe resulting from your query. Presto was pre-trained on millions of unlabeled samples around the world and finetuned on global labelled land cover and crop type data from the WorldCereal reference database. The resulting *embeddings* and the *target* labels to train on will be returned as a training dataframe which we will use for downstream model training." ] }, { @@ -169,8 +228,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5. Train custom classification model\n", - "We train a catboost model for the selected crop types. Class weights are automatically determined to balance the individual classes." + "### 6. Train custom classification model\n", + "We train a catboost model for the selected crop types. By default, no class weighting is done. You could opt to enable this by setting `balance_classes=True`, however, depending on the class distribution this may lead to undesired results. There is no golden rule here." ] }, { @@ -181,7 +240,7 @@ "source": [ "from utils import train_classifier\n", "\n", - "custom_model, report, confusion_matrix = train_classifier(training_dataframe)" + "custom_model, report, confusion_matrix = train_classifier(training_dataframe, balance_classes=False)" ] }, { @@ -205,7 +264,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6. Deploy your custom model\n", + "### 7. Deploy your custom model\n", "\n", "Once trained, we have to upload our model to the cloud so it can be used by OpenEO for inference. Note that these models are only kept in cloud storage for a limited amount of time.\n" ] @@ -229,48 +288,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 7. Generate a map\n", + "### 8. Generate a map\n", "\n", "Using our custom model, we generate a map for our region and season of interest.\n", - "To determine your season of interest, you can consult the WorldCereal crop calendars (by executing the next cell), or check out the [USDA crop calendars](https://ipad.fas.usda.gov/ogamaps/cropcalendar.aspx)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import retrieve_worldcereal_seasons\n", - "\n", - "spatial_extent = map.get_processing_extent()\n", - "seasons = retrieve_worldcereal_seasons(spatial_extent)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now use the slider to select your processing period. Note that the length of the period is always fixed to a year.\n", - "Just make sure your season of interest is fully captured within the period you select." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import date_slider\n", "\n", - "slider = date_slider()\n", - "slider.show_slider()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ "Set some other customization options:" ] }, @@ -308,9 +329,8 @@ " save_intermediate=save_intermediate,\n", " keep_class_probs=keep_class_probs)\n", "\n", - "# Specify the local directory where the resulting maps should be downloaded to.\n", - "run = get_input(\"model run\")\n", - "output_dir = Path(os.getcwd()) / f'CROPTYPE_{modelname}_{run}'\n", + "# The output directory is named after the model\n", + "output_dir = Path(os.getcwd()) / f'CROPTYPE_{modelname}'\n", "print(f\"Output directory: {output_dir}\")" ] }, @@ -354,6 +374,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The results contain the openeo job id...\n", + "print(f\"Job id: {job_results.job_id}\")\n", + "print(f\"Location of metadata: {job_results.metadata}\")\n", + "#... a list of products that were downloaded...\n", + "print(f\"Products: {job_results.products.keys()}\")\n", + "# ... for each product:\n", + "print('-- For each product --')\n", + "print(f\"Type: {job_results.products['croptype']['type']}\")\n", + "print(f\"Temporal extent: {job_results.products['croptype']['temporal_extent']}\")\n", + "print(f\"Look-up table: {job_results.products['croptype']['lut']}\")\n", + "print(f\"URL: {job_results.products['croptype']['url']}\")\n", + "print(f\"Local path: {job_results.products['croptype']['path']}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -384,8 +424,19 @@ "source": [ "from utils import prepare_visualization\n", "\n", - "rasters = prepare_visualization(job_results)\n", - "print(rasters)" + "filepaths = prepare_visualization(job_results)\n", + "print(filepaths)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import visualize_classification\n", + "\n", + "visualize_classification(filepaths, \"croptype\")" ] }, { @@ -411,7 +462,7 @@ "source": [ "from utils import visualize_products\n", "\n", - "visualize_products(rasters, port=8887)" + "visualize_products(filepaths, port=8887)" ] }, { @@ -422,7 +473,7 @@ "source": [ "from utils import show_color_legend\n", "\n", - "show_color_legend(rasters, \"croptype\")" + "show_color_legend(filepaths, \"croptype\")" ] } ], diff --git a/notebooks/worldcereal_v1_demo_default_cropland.ipynb b/notebooks/worldcereal_v1_demo_default_cropland.ipynb index 4c74728f..801c2e1e 100644 --- a/notebooks/worldcereal_v1_demo_default_cropland.ipynb +++ b/notebooks/worldcereal_v1_demo_default_cropland.ipynb @@ -43,8 +43,7 @@ "source": [ "### Before you start\n", "\n", - "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem,\n", - "by completing the form [here](https://dataspace.copernicus.eu/).\n", + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n", "This is free of charge and will grant you a number of free openEO processing credits to continue this demo." ] }, @@ -195,6 +194,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The results contain the openeo job id...\n", + "print(f\"Job id: {results.job_id}\")\n", + "print(f\"Location of metadata: {results.metadata}\")\n", + "#... a list of products that were downloaded...\n", + "print(f\"Products: {results.products.keys()}\")\n", + "# ... for each product:\n", + "print('-- For each product --')\n", + "print(f\"Type: {results.products['cropland']['type']}\")\n", + "print(f\"Temporal extent: {results.products['cropland']['temporal_extent']}\")\n", + "print(f\"Look-up table: {results.products['cropland']['lut']}\")\n", + "print(f\"URL: {results.products['cropland']['url']}\")\n", + "print(f\"Local path: {results.products['cropland']['path']}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb b/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb index 314309c4..37f0d254 100644 --- a/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb +++ b/notebooks/worldcereal_v1_demo_default_cropland_extended.ipynb @@ -43,8 +43,7 @@ "source": [ "### Before you start\n", "\n", - "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the Copernicus Data Space Ecosystem,\n", - "by completing the form [here](https://dataspace.copernicus.eu/).\n", + "In order to run WorldCereal crop mapping jobs from this notebook, you need to create an account on the [Copernicus Data Space Ecosystem](https://dataspace.copernicus.eu/).\n", "This is free of charge and will grant you a number of free openEO processing credits to continue this demo." ] }, @@ -217,6 +216,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The results contain the openeo job id...\n", + "print(f\"Job id: {results.job_id}\")\n", + "print(f\"Location of metadata: {results.metadata}\")\n", + "#... a list of products that were downloaded...\n", + "print(f\"Products: {results.products.keys()}\")\n", + "# ... for each product:\n", + "print('-- For each product --')\n", + "print(f\"Type: {results.products['cropland']['type']}\")\n", + "print(f\"Temporal extent: {results.products['cropland']['temporal_extent']}\")\n", + "print(f\"Look-up table: {results.products['cropland']['lut']}\")\n", + "print(f\"URL: {results.products['cropland']['url']}\")\n", + "print(f\"Local path: {results.products['cropland']['path']}\")" + ] + }, { "cell_type": "markdown", "metadata": {},