From 0fb5b031e8a91fda6558a53dfebef9a90163c455 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Tue, 27 Aug 2024 09:32:06 +0000 Subject: [PATCH 01/13] Adding data to fromat tags to pangeo-fish format THe notebook data_formating.ipynb showcase how to convert from raw files to pangeo-fish format. The file data_conversion.py contains all the functions needed to extract informations on the files. --- docs/data_conversion.py | 410 +++++++++++++++++++++++++++++++++++ docs/data_formating.ipynb | 443 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 853 insertions(+) create mode 100644 docs/data_conversion.py create mode 100644 docs/data_formating.ipynb diff --git a/docs/data_conversion.py b/docs/data_conversion.py new file mode 100644 index 0000000..7f8981e --- /dev/null +++ b/docs/data_conversion.py @@ -0,0 +1,410 @@ +import pandas as pd +import numpy as np +from datetime import datetime +import csv +import json +import os +import pytz +import s3fs + + +def show_data_csv(chemin_fichier): + with open(chemin_fichier, newline="", encoding="latin-1") as csvfile: + # Créer un lecteur CSV + lecteur_csv = csv.reader(csvfile, delimiter=",") + + for ligne in lecteur_csv: + if ligne != []: ### Removes empty spaces + print(ligne) + + +def create_metadata_file(file_path, destination_path, remote=False): + """ + Create a metadata JSON file based on the provided data path. + + Args: + file_path (str): The path from which to extract the tag name. + destination_path (str): The path where you want the metadata file to be written. + remote (bool): If True, save the file to an S3 path. If False, save it locally. + + Returns: + None + """ + # Extract the tag name from the file path + tag_id = extract_name(file_path) + + # Create the metadata dictionary + metadata = { + "pit_tag_id": tag_id, + "scientific_name": "Dicentrarchus labrax", + "common_name": "European seabass", + "project": "BARGIP", + } + + # Set the filename for the metadata file + filename = "metadata.json" + + if remote: + # If remote is True, save the file to an S3 path + s3 = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "https://s3.gra.perf.cloud.ovh.net", # S3 endpoint for OVH + }, + ) + full_destination_path = os.path.join(destination_path, filename) + with s3.open(full_destination_path, "w") as f: + json.dump(metadata, f) + else: + # If remote is False, save the file locally + full_destination_path = os.path.join(destination_path, filename) + with open(full_destination_path, "w") as f: + json.dump(metadata, f) + + +def extract_name(file_path): + """ + Extracts the filename without extension from the given path. + + Args: + path (str): The file path. + + Returns: + str: The filename without extension. + """ + # Use os.path.basename to get the filename + file_name = os.path.basename(file_path) + # Use os.path.splitext to separate the filename from its extension and get the first element + file_name_without_extension = os.path.splitext(file_name)[0] + return file_name_without_extension + + +def convert_to_utc_with_formatting(date, time_zone): + """ + Convert the given date string to UTC time, with flexible date format parsing. + + Parameters: + date (str): A string representing the date and time. + Supports various formats including '%d/%m/%Y %H:%M', '%d/%m/%y %H:%M', + '%d/%m/%Y %H:%M:%S', '%d/%m/%y %H:%M:%S', '%y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'. + time_zone (str): A string representing the time zone, e.g., 'America/New_York', 'Europe/London', etc. + + Returns: + str: A string representing the converted date and time in UTC in the format 'yyyy-mm-ddThh:mm:ssZ'. + + Raises: + ValueError: If the input date string is not in any of the supported formats or the time zone is invalid. + """ + # Define possible date formats + possible_formats = [ + "%d/%m/%Y %H:%M", + "%d/%m/%y %H:%M", + "%d/%m/%Y %H:%M:%S", + "%d/%m/%y %H:%M:%S", + "%y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M:%S", + ] + + # Try parsing the date with different formats + for fmt in possible_formats: + try: + # Attempt to parse the date with the current format + parsed_date = datetime.strptime(date, fmt) + # Convert the parsed date to the specified time zone + tz = pytz.timezone(time_zone) + localized_time = tz.localize(parsed_date) + # Convert the localized time to UTC + utc_time = localized_time.astimezone(pytz.utc) + # Format the UTC time as a string and return it + return utc_time.strftime("%Y-%m-%dT%H:%M:%SZ") + except ValueError: + # If parsing fails with the current format, try the next one + pass + + # If none of the formats work, raise a ValueError + raise ValueError("Invalid date format: {}".format(date)) + + +def format_date(date): + """ + Convert the date to the accurate ISO8601 time format + """ + possible_formats = [ + "%d/%m/%Y %H:%M", + "%d/%m/%y %H:%M", + "%d/%m/%Y %H:%M:%S", + "%d/%m/%y %H:%M:%S", + "%y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M:%S", + ] + + # Try parsing the date with different formats + for fmt in possible_formats: + try: + # Attempt to parse the date with the current format + parsed_date = datetime.strptime(date, fmt) + # Convert the parsed date to ISO8601 format and return it + return parsed_date.strftime("%Y-%m-%dT%H:%M:%SZ") + except ValueError: + # If parsing fails with the current format, try the next one + pass + + # If none of the formats work, raise a ValueError + raise ValueError("Invalid date format: {}".format(date)) + + +def format_coord(coordinate_str): + """ + Convert a coordinate string into a numeric value. + + Parameters: + coordinate_str (str): A string representing a coordinate in the format "value direction", + where direction is either 'E' or 'W' for longitude, or 'N' or 'S' for latitude. + + Returns: + float: The numeric value of the coordinate. Positive if the direction is 'E' or 'N', + negative if the direction is 'W' or 'S'. + """ + # Split the numeric value and direction (E or W, N or S) + val, direction = coordinate_str.split() + + # Convert the value to float64 + val = np.float64(val) + + # Check the direction and adjust the value accordingly + if direction.upper() == "W" or direction.upper() == "S": + val = -val + + return val + + +def extract_tagging_events(file_path, time_zone="Europe/Paris", remote=False): + """ + Extracts releasing date and presumed date of fish death from a CSV file stored locally or on S3. + + Args: + file_path (str): The path to the CSV file. For remote files, provide the S3 URI. + time_zone (str): The time zone to use for date conversion. + remote (bool): If True, fetch the file from S3. If False, read the file locally. + + Returns: + pd.DataFrame: A DataFrame containing event names, times, longitudes, and latitudes. + """ + release_date = None + fish_death = None + lon = [] + lat = [] + + if remote: + # Use s3fs to connect to the S3-compatible storage + s3 = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "https://s3.gra.perf.cloud.ovh.net", # S3 endpoint for OVH + }, + ) + # Open the file from S3 + csvfile = s3.open(file_path, mode="r", encoding="latin-1") + else: + # Open the file locally + csvfile = open(file_path, newline="", encoding="latin-1") + + try: + # Create a CSV reader + csv_reader = csv.reader(csvfile, delimiter=",") + + # Read each line of the CSV file + for line in csv_reader: + if line: + # Extract the release date and convert it to UTC + if line[0] == "releasing date ": + release_date = convert_to_utc_with_formatting( + line[1], time_zone=time_zone + ) + + # Extract the presumed date of fish death and convert it to UTC + if line[0] == "presumed date of fish death ": + fish_death = convert_to_utc_with_formatting( + line[1], time_zone=time_zone + ) + + # Extract the fish release position (latitude and longitude) + if line[0] == "fish release position ": + if line[1] != "unknown": + lat.append(format_coord(line[1])) + lon.append(format_coord(line[2])) + else: + lat.append(np.nan) # Use NaN if the position is unknown + lon.append(np.nan) + + # Extract the fish recapture position (latitude and longitude) + if line[0] == "fish recapture position ": + if line[1] != "unknown": + lat.append(format_coord(line[1])) + lon.append(format_coord(line[2])) + else: + lat.append(np.nan) # Use NaN if the position is unknown + lon.append(np.nan) + + # Combine the extracted data into a DataFrame + dates = [release_date, fish_death] + data = { + "event_name": ["release", "fish_death"], + "time": dates, + "longitude": lon, + "latitude": lat, + } + events = pd.DataFrame(data) + + finally: + # Close the file after reading + csvfile.close() + + # Return the DataFrame containing the extracted events + return events + + +def extract_DST(file_path, time_zone, remote=False): + """ + Extracts time, pressure, and temperature data from a CSV file containing time series data. + + Args: + file_path (str): The path to the CSV file. For remote files, provide the S3 URI. + time_zone (str): The time zone for date conversion. + remote (bool): If True, fetch the file from S3. If False, read the file locally. + + Returns: + pandas.DataFrame: A DataFrame containing the extracted data. + """ + # List to store all the data + all_data = [] + expected_length = 0 + + # Extracting tag ID from the file path + tag_id = extract_name(file_path) + + if remote: + # Use s3fs to connect to the S3-compatible storage + s3 = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "https://s3.gra.perf.cloud.ovh.net", # S3 endpoint for OVH + }, + ) + # Open the file from S3 + csvfile = s3.open(file_path, mode="r", encoding="latin-1") + else: + # Open the file locally + csvfile = open(file_path, newline="", encoding="latin-1") + + try: + # Create a CSV reader + csv_reader = csv.reader(csvfile, delimiter=",") + + # Variables to store data for the current block + data = [] + reached_target_line = False + + # Read each line of the CSV file + for line in csv_reader: + # If the line is not empty and contains information about the expected length of data + if line and "Data points available =" in line[0]: + expected_length += int(line[0].split(sep="=")[1]) + + # Check if the current line is the target line + if not reached_target_line: + if line == ["Date/Time Stamp", "Pressure", "Temp"]: + reached_target_line = True + else: + # If the line is empty, add the data of the current block to the total and reset the data of the block + if not line: + if data: + all_data.extend(data) + data = [] + reached_target_line = False + else: + # Otherwise, add the line of data to the current block + line[0] = convert_to_utc_with_formatting( + line[0], time_zone + ) # Format date to ISO8601 and convert to UTC + line[1] = np.float64( + line[1] + ) # Convert data type from str to float64 + line[2] = np.float64( + line[2] + ) # Convert data type from str to float64 + + data.append(line) + + finally: + # Close the file after reading + csvfile.close() + + # Convert all the data into a pandas DataFrame + df = pd.DataFrame(all_data, columns=["time", "pressure", "temperature"])[ + ["time", "temperature", "pressure"] + ] + + # Check if the expected length matches the actual length of data extracted + if expected_length == df.shape[0]: + print("Extraction for tag {} complete, no missing data".format(tag_id)) + else: + print("Extraction for tag {} might be incomplete, be careful".format(tag_id)) + + return df + + +def compat_checking(check_filepath, ref_filepath): + """ + Check the compatibility between a generated file and a reference file. + + Args: + check_filepath (str): Path to the generated file that needs to be checked. + ref_filepath (str): Path to the reference file already in a pangeo-fish compatible format. + + Returns: + None + + """ + # Load the generated file + generated_df = pd.read_csv(check_filepath) + + # Load the reference file + reference_df = pd.read_csv(ref_filepath) + + print("tests:") + # Test 1: Check if the columns are the same + if list(generated_df.columns) == list(reference_df.columns): + print("- Column names match.") + else: + print("- Column names do not match.") + + # Test 2: Check if the data types are the same + if generated_df.dtypes.equals(reference_df.dtypes): + print("- Data types match.") + else: + print("- Data types do not match.") + + +def save_dataframe_to_s3(dataframe, destination_path): + """ + Save a pandas DataFrame to a CSV file on an S3 path. + + Args: + dataframe (pd.DataFrame): The DataFrame to save. + destination_path (str): The S3 destination path where the CSV will be saved. + + Returns: + None + """ + # Create an S3 filesystem object + s3 = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "https://s3.gra.perf.cloud.ovh.net", # S3 endpoint for OVH + }, + ) + + # Save the DataFrame to the specified S3 path + with s3.open(destination_path, "w") as f: + dataframe.to_csv(f) diff --git a/docs/data_formating.ipynb b/docs/data_formating.ipynb new file mode 100644 index 0000000..10b1720 --- /dev/null +++ b/docs/data_formating.ipynb @@ -0,0 +1,443 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "13b2162c-86a6-4830-a706-32a2b3052564", + "metadata": {}, + "source": [ + "# **This notebook aims to extract data from a correctly formatted CSV file and adapt it to the pangeo-fish format**" + ] + }, + { + "cell_type": "markdown", + "id": "9b00244c-fe99-4c46-b651-aab892497506", + "metadata": {}, + "source": [ + "### **Necessary imports**\n", + "___" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf0e3fa4-a1ec-4df6-93e8-d0318869c065", + "metadata": {}, + "outputs": [], + "source": [ + "from data_conversion import extract_tagging_events\n", + "from data_conversion import create_metadata_file\n", + "from data_conversion import extract_name\n", + "from data_conversion import extract_DST\n", + "\n", + "import os\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "348073dc-2c82-4c7f-9e33-ddb3eb3f31c4", + "metadata": {}, + "outputs": [], + "source": [ + "remote = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30ec15c7-0045-4124-b40e-4e294ed7b96e", + "metadata": {}, + "outputs": [], + "source": [ + "### Test with the tag NO_A12667\n", + "### These two paths will be used as an example to see if the full data extraction works correctly\n", + "\n", + "csv_path = \"s3://gfts-ifremer/tags/bargip/raw/AD_A11791.CSV\" # Path to the raw csv file, where the code will extract data from. Update with yours to adapt\n", + "destination = \"s3://gfts-ifremer/tags/bargip/clean_demo/AD_A11791/\" # Folder where you want to write your the different files. Update with yours to adapt\n", + "\n", + "if not remote:\n", + " os.makedirs(destination, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "f9febac8-6c0e-433f-a637-4eb1dd3569ac", + "metadata": {}, + "source": [ + "___\n", + "### 1. **Extracting the tagging events**\n", + "In this section, we try to test and compare how to extract the necessary information for the tagging events (i.e., time and position for release, fish death, (recapture?))\n", + "___" + ] + }, + { + "cell_type": "markdown", + "id": "cf5549ae-3722-40a3-8dc6-4b86c7958d02", + "metadata": {}, + "source": [ + "See below the steps that the extract_DST function does: \n", + "\n", + "- **Purpose**:\n", + " - Extracts releasing date, presumed fish death date, and fish release/recapture positions from a CSV file.\n", + "\n", + "- **Initialization**:\n", + " - Initializes variables for storing dates (`release_date`, `fish_death`) and coordinates (`lon`, `lat`).\n", + "\n", + "- **Processing CSV**:\n", + " - Opens the CSV file and iterates through each line.\n", + "\n", + "- **Data Extraction**:\n", + " - Extracts releasing date and presumed fish death date.\n", + " - Formats latitude and longitude coordinates for fish release/recapture positions.\n", + "\n", + "- **DataFrame Creation**:\n", + " - Constructs a DataFrame with event names, dates, longitude, and latitude.\n", + " - Returns the DataFrame containing tagging events data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd1185b8-504e-442a-81f2-d2660e7f7e0d", + "metadata": {}, + "outputs": [], + "source": [ + "import s3fs\n", + "\n", + "s3 = s3fs.S3FileSystem(\n", + " anon=False,\n", + " client_kwargs={\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d497b3ca-87cb-4269-9ea2-a57d775404a3", + "metadata": {}, + "outputs": [], + "source": [ + "### See the function tagging_events in the file data_conversion.py for further information\n", + "tagging_events = extract_tagging_events(\n", + " csv_path, time_zone=\"Europe/Paris\", remote=remote\n", + ")\n", + "te_save_path = destination + \"tagging_events.csv\"\n", + "tagging_events.to_csv(te_save_path, index=False)\n", + "tagging_events" + ] + }, + { + "cell_type": "markdown", + "id": "cbd639af-9ae5-4acf-a6cc-80c98c864f7a", + "metadata": {}, + "source": [ + "___\n", + "### 2. **Creating the metadata JSON file**\n", + "In this section, we try to test and compare how to extract the necessary information for the metatdat.json file. \n", + "___\n", + "- **Purpose**:\n", + " - Creates a metadata JSON file based on provided data path and destination path.\n", + "\n", + "- **Initialization**:\n", + " - Retrieves tag name from the provided file path using a helper function.\n", + " - Initializes metadata with tag ID, scientific name, common name, and project information.\n", + "\n", + "- **Metadata Construction**:\n", + " - Constructs a dictionary (`metadata`) containing tag ID, scientific name (\"Dicentrarchus labrax\"), common name (\"European seabass\"), and project name (\"BARGIP\").\n", + "\n", + "- **File Writing**:\n", + " - Specifies the filename as \"metadata.json\" and constructs the full destination path.\n", + " - Writes the metadata dictionary to a JSON file at the destination path.\n", + "\n", + "- **Result**:\n", + " - No return value; a metadata JSON file is created at the specified destination path." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1a9ba62-2d65-4d3d-bc26-9d52a0cc85ee", + "metadata": {}, + "outputs": [], + "source": [ + "### See data_conversion.py for more information about create_metadata_file function\n", + "create_metadata_file(csv_path, destination, remote=True)" + ] + }, + { + "cell_type": "markdown", + "id": "2b9855de-f094-4db2-9131-88430115ad44", + "metadata": {}, + "source": [ + "___\n", + "### 3. **Creating the dst.csv file**\n", + "In this section, we will create the dst file that contains the pressure, temperature and time data. \n", + "See below the steps that the extract_DST function does: \n", + "___" + ] + }, + { + "cell_type": "markdown", + "id": "7096f963-a919-4dd7-97d1-34eff6ebbf3f", + "metadata": {}, + "source": [ + "- **Opening the CSV File**:\n", + " - Takes a file path to a CSV file containing time series data.\n", + " - Opens the CSV file using the `csv.reader` object.\n", + " \n", + "- **Iterating Through CSV Rows**:\n", + " - Iterates through each row of the CSV file.\n", + " \n", + "- **Extracting Tag ID**:\n", + " - Extracts the tag ID from the file path using the `extract_name` function (not provided).\n", + " \n", + "- **Finding Target Line**:\n", + " - Searches for the line that contains the headers for the data of interest (\"Date/Time Stamp\", \"Pressure\", \"Temp\").\n", + " \n", + "- **Reading Data**:\n", + " - Once the target line is found, starts reading data rows.\n", + " \n", + "- **Formatting Date and Time**:\n", + " - Formats the date and time column using the `convert_to_utc_with_formatting` function.\n", + " - Converts the local time to UTC time based on the specified time zone.\n", + " \n", + "- **Converting Data Types**:\n", + " - Converts the pressure and temperature data from strings to `numpy.float64` for numerical analysis.\n", + " \n", + "- **Storing Data**:\n", + " - Stores the formatted data into a list for further processing.\n", + " \n", + "- **Creating DataFrame**:\n", + " - After reading all data rows, converts the list of data into a Pandas DataFrame with columns ['time', 'pressure', 'temperature'].\n", + " \n", + "- **Completeness Check**:\n", + " - Checks if the number of data points extracted matches the expected length.\n", + " - If they match, indicates completion; otherwise, suggests potential incompleteness.\n", + " \n", + "- **Returning DataFrame**:\n", + " - Finally, returns the DataFrame containing the extracted data.\n", + "\n", + "This function primarily focuses on extracting time, pressure, and temperature data from a CSV file, converting the date and time to UTC time, and formatting the data for analysis. Check the function in the file data_conversion.py for further information." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0c77590-3f0b-469a-89b4-a08252759068", + "metadata": {}, + "outputs": [], + "source": [ + "time_zone = \"Europe/Paris\"\n", + "dst = extract_DST(csv_path, time_zone, remote=True)\n", + "dst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90417035-f1c9-4d72-80d3-384404c618be", + "metadata": {}, + "outputs": [], + "source": [ + "dst_save_path = destination + \"dst.csv\"\n", + "dst.to_csv(dst_save_path, index=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "fdf61e0d-4a6e-40ae-8c38-6cd016fa5c60", + "metadata": {}, + "source": [ + "___\n", + "### 4. **Convert and format everything under the raw_test folder to the cleaned folder**\n", + "This section has test purpose to see if it's easy and works correctly for the different tags in the **raw_test** folder. \n", + "Afterwards, the purpose is to do the same operation on the **raw** folder\n", + "___\n", + "#### Explenation of the code below :\n", + "- **Folders and Time Zone Setup**:\n", + " - Defines folders (`raw_folder`, `destination_folder`) and time zone (`time_zone`).\n", + "\n", + "- **Destination Folder Creation**:\n", + " - Checks if the destination folder exists; if not, creates it.\n", + "\n", + "- **Processing Raw Data**:\n", + " - Iterates through raw files in the raw folder.\n", + " - Extracts tag ID and constructs destination paths.\n", + " - Creates tag-specific folders if they don't exist.\n", + " - Extracts tagging events and DST data from raw files.\n", + " - Saves extracted data to CSV files in respective tag folders.\n", + " - Creates metadata files for each raw file.\n", + "\n", + "- **Handling Incorrect Raw Folder**:\n", + " - Prints a message if the raw folder doesn't exist." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2b345ec-d9f9-494c-a4c7-fc0fce417f87", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "### Local\n", + "if not remote:\n", + " raw_folder = \"../../all_raw/\" # Folder name to explore\n", + " destination_folder = \"../../all_cleaned/\"\n", + " time_zone = \"Europe/Paris\"\n", + "\n", + " if not os.path.exists(destination_folder):\n", + " os.mkdir(destination_folder)\n", + "\n", + " # Check if the folder exists\n", + " if os.path.exists(raw_folder):\n", + " # Get list of files to iterate through\n", + " files = [\n", + " f\n", + " for f in os.listdir(raw_folder)\n", + " if os.path.isfile(os.path.join(raw_folder, f))\n", + " ]\n", + "\n", + " # Wrap files list with tqdm for progress bar\n", + " for file_name in tqdm(files, desc=\"Processing files\"):\n", + " raw_file = os.path.join(raw_folder, file_name)\n", + "\n", + " # Extract filename without extension\n", + " tag_id = extract_name(raw_file)\n", + " destination_path = os.path.join(destination_folder, tag_id)\n", + "\n", + " # Check if the folder for the tag exists, if not, create it\n", + " if not os.path.exists(destination_path):\n", + " # print(\"Creating folder for tag:\", tag_id)\n", + " os.mkdir(destination_path)\n", + "\n", + " ### Extracting tagging events from raw file\n", + " tag_events = extract_tagging_events(raw_file)\n", + " tagging_events_path = os.path.join(destination_path, \"tagging_events.csv\")\n", + " tag_events.to_csv(\n", + " tagging_events_path, index=False\n", + " ) ### Saving them at the right path\n", + "\n", + " ### Extracting DST from raw file\n", + " tag_dst = extract_DST(raw_file, time_zone)\n", + " dst_path = os.path.join(destination_path, \"dst.csv\")\n", + " tag_dst.to_csv(dst_path, index=False) ### Saving them at the right path\n", + "\n", + " ###Creating metadata files\n", + " # print(\"creating_metadata\")\n", + " create_metadata_file(raw_file, destination_path)\n", + "\n", + " # else:\n", + " # print(\"Wrong folder for raw files\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19225a39-ed6e-49c9-a3a0-b35a78272fa5", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "### Remote\n", + "if remote:\n", + " s3 = s3fs.S3FileSystem(\n", + " anon=False,\n", + " client_kwargs={\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\", # S3 endpoint for OVH\n", + " },\n", + " )\n", + " raw_folder = \"s3://gfts-ifremer/tags/bargip/raw\" # Folder name to explore\n", + " destination_folder = \"s3://gfts-ifremer/tags/bargip/clean_demo/\"\n", + " time_zone = \"Europe/Paris\"\n", + "\n", + " # if not os.path.exists(destination_folder):\n", + " # os.mkdir(destination_folder)\n", + "\n", + " # Check if the folder exists\n", + " # if os.path.exists(raw_folder):\n", + "\n", + " # Get list of files to iterate through\n", + " files = [tag_id for tag_id in s3.ls(\"gfts-ifremer/tags/bargip/raw\")]\n", + "\n", + " # Wrap files list with tqdm for progress bar\n", + " for file_path in tqdm(files, desc=\"Processing files\"):\n", + " tag_id = file_path.replace(\"gfts-ifremer/tags/bargip/raw/\", \"\").replace(\n", + " \".CSV\", \"\"\n", + " )\n", + "\n", + " # # Extract filename without extension\n", + " destination = f\"{destination_folder}{tag_id}\"\n", + "\n", + " ### Extracting tagging events from raw file\n", + " tag_events = extract_tagging_events(file_path, remote=True)\n", + " te_save_path = f\"{destination}/tagging_events.csv\"\n", + " tag_events.to_csv(te_save_path, index=False)\n", + "\n", + " # ### Extracting DST from raw file\n", + " tag_dst = extract_DST(file_path, time_zone, remote=True)\n", + " dst_save_path = f\"{destination}/dst.csv\"\n", + " tag_dst.to_csv(dst_save_path, index=True)\n", + "\n", + " ###Creating metadata files\n", + " # print(\"creating_metadata\")\n", + " create_metadata_file(file_path, destination, remote=True)\n", + "\n", + "# else:\n", + "# print(\"Wrong folder for raw files\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a92a4b0-a25a-45af-b929-cfdb142ab2f5", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Run this cell if you computed the tags locally and you want to put all tags from local to the bucket\n", + "\n", + "import s3fs\n", + "\n", + "s3 = s3fs.S3FileSystem(\n", + " anon=False,\n", + " client_kwargs={\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", + " },\n", + ")\n", + "\n", + "if not remote:\n", + " s3.put(\"../../all_cleaned/\", \"gfts-ifremer/tags/bargip/cleaned\", recursive=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From e7cbd3071552a3df719b8033652ebfc44fd96064 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Thu, 29 Aug 2024 15:14:14 +0000 Subject: [PATCH 02/13] Adding notebooks for the data formatting and for the execution of multiple parametrized notebooks data_conversion.py : Contains all the functions for formatting files in pangeo-fish format data_formatting.ipynb : Notebook that showcase how the formatting works and ends with the routine to convert all the raw files pangeo-fish_papermill .ipynb: Template notebook executing all the pangeo algorithm through one file papermill_launcher.ipynb : Launcher that uses the template notebook with a given set of parameters --- docs/data_conversion.py | 26 +- docs/data_formating.ipynb | 226 ++- docs/pangeo-fish_papermill.ipynb | 2256 ++++++++++++++++++++++++++++++ docs/papermill_launcher.ipynb | 250 ++++ 4 files changed, 2730 insertions(+), 28 deletions(-) create mode 100644 docs/pangeo-fish_papermill.ipynb create mode 100644 docs/papermill_launcher.ipynb diff --git a/docs/data_conversion.py b/docs/data_conversion.py index 7f8981e..e3499fd 100644 --- a/docs/data_conversion.py +++ b/docs/data_conversion.py @@ -103,6 +103,7 @@ def convert_to_utc_with_formatting(date, time_zone): "%d/%m/%y %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S", + "%Y-%m-%dT%H:%M:%SZ", ] # Try parsing the date with different formats @@ -324,9 +325,7 @@ def extract_DST(file_path, time_zone, remote=False): reached_target_line = False else: # Otherwise, add the line of data to the current block - line[0] = convert_to_utc_with_formatting( - line[0], time_zone - ) # Format date to ISO8601 and convert to UTC + line[0] = format_date(line[0]) # Format date to ISO8601 line[1] = np.float64( line[1] ) # Convert data type from str to float64 @@ -345,6 +344,27 @@ def extract_DST(file_path, time_zone, remote=False): ["time", "temperature", "pressure"] ] + # Getting all the timestamps + time_stamps = pd.to_datetime(df["time"]) + + # Calculting time deltas + time_deltas = time_stamps - time_stamps.iloc[0] + + # Getting first timestamp and converting it to utc. + initial_time = time_stamps.iloc[0].strftime("%Y-%m-%dT%H:%M:%SZ") + time_utc = pd.to_datetime( + convert_to_utc_with_formatting(initial_time, "Europe/Paris") + ) + + # Calculating the new timestamps series and formatting it to ISO8601 + corrected_timestamps = time_deltas + time_utc + formatted_corrected_timestamps = corrected_timestamps.dt.strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + + # Replacing the in the dataframe + df["time"] = formatted_corrected_timestamps + # Check if the expected length matches the actual length of data extracted if expected_length == df.shape[0]: print("Extraction for tag {} complete, no missing data".format(tag_id)) diff --git a/docs/data_formating.ipynb b/docs/data_formating.ipynb index 10b1720..0620546 100644 --- a/docs/data_formating.ipynb +++ b/docs/data_formating.ipynb @@ -24,11 +24,16 @@ "metadata": {}, "outputs": [], "source": [ + "import pandas as pd\n", + "import numpy as np\n", "from data_conversion import extract_tagging_events\n", "from data_conversion import create_metadata_file\n", "from data_conversion import extract_name\n", + "from data_conversion import format_date\n", "from data_conversion import extract_DST\n", + "from data_conversion import convert_to_utc_with_formatting\n", "\n", + "import csv\n", "import os\n", "from tqdm import tqdm" ] @@ -52,9 +57,9 @@ "source": [ "### Test with the tag NO_A12667\n", "### These two paths will be used as an example to see if the full data extraction works correctly\n", - "\n", - "csv_path = \"s3://gfts-ifremer/tags/bargip/raw/AD_A11791.CSV\" # Path to the raw csv file, where the code will extract data from. Update with yours to adapt\n", - "destination = \"s3://gfts-ifremer/tags/bargip/clean_demo/AD_A11791/\" # Folder where you want to write your the different files. Update with yours to adapt\n", + "tag_id = \"DK_A10627\"\n", + "csv_path = f\"s3://gfts-ifremer/tags/bargip/raw/{tag_id}.CSV\" # Path to the raw csv file, where the code will extract data from. Update with yours to adapt\n", + "destination = f\"s3://gfts-ifremer/tags/bargip/clean_demo/{tag_id}/\" # Folder where you want to write your the different files. Update with yours to adapt\n", "\n", "if not remote:\n", " os.makedirs(destination, exist_ok=True)" @@ -125,7 +130,7 @@ " csv_path, time_zone=\"Europe/Paris\", remote=remote\n", ")\n", "te_save_path = destination + \"tagging_events.csv\"\n", - "tagging_events.to_csv(te_save_path, index=False)\n", + "# tagging_events.to_csv(te_save_path,index=False)\n", "tagging_events" ] }, @@ -226,7 +231,17 @@ { "cell_type": "code", "execution_count": null, - "id": "d0c77590-3f0b-469a-89b4-a08252759068", + "id": "7b7c3c74-08a2-4457-a80b-491e31489121", + "metadata": {}, + "outputs": [], + "source": [ + "csv_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "260fdf04-e86f-4e92-a8f0-21f03b4a0758", "metadata": {}, "outputs": [], "source": [ @@ -341,9 +356,137 @@ { "cell_type": "code", "execution_count": null, - "id": "19225a39-ed6e-49c9-a3a0-b35a78272fa5", + "id": "b4e015ee-f9dc-4faf-a603-a3ae819c64d4", + "metadata": {}, + "outputs": [], + "source": [ + "csv_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfff5e2f-0ae1-4da0-945b-786db22c973d", "metadata": {}, "outputs": [], + "source": [ + "def extract_DST(file_path, time_zone, remote=False):\n", + " \"\"\"\n", + " Extracts time, pressure, and temperature data from a CSV file containing time series data.\n", + "\n", + " Args:\n", + " file_path (str): The path to the CSV file. For remote files, provide the S3 URI.\n", + " time_zone (str): The time zone for date conversion.\n", + " remote (bool): If True, fetch the file from S3. If False, read the file locally.\n", + "\n", + " Returns:\n", + " pandas.DataFrame: A DataFrame containing the extracted data.\n", + " \"\"\"\n", + " # List to store all the data\n", + " all_data = []\n", + " expected_length = 0\n", + "\n", + " # Extracting tag ID from the file path\n", + " tag_id = extract_name(file_path)\n", + "\n", + " if remote:\n", + " # Use s3fs to connect to the S3-compatible storage\n", + " s3 = s3fs.S3FileSystem(\n", + " anon=False,\n", + " client_kwargs={\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\", # S3 endpoint for OVH\n", + " },\n", + " )\n", + " # Open the file from S3\n", + " csvfile = s3.open(file_path, mode=\"r\", encoding=\"latin-1\")\n", + " else:\n", + " # Open the file locally\n", + " csvfile = open(file_path, newline=\"\", encoding=\"latin-1\")\n", + "\n", + " try:\n", + " # Create a CSV reader\n", + " csv_reader = csv.reader(csvfile, delimiter=\",\")\n", + "\n", + " # Variables to store data for the current block\n", + " data = []\n", + " reached_target_line = False\n", + "\n", + " # Read each line of the CSV file\n", + " for line in csv_reader:\n", + " # If the line is not empty and contains information about the expected length of data\n", + " if line and \"Data points available =\" in line[0]:\n", + " expected_length += int(line[0].split(sep=\"=\")[1])\n", + "\n", + " # Check if the current line is the target line\n", + " if not reached_target_line:\n", + " if line == [\"Date/Time Stamp\", \"Pressure\", \"Temp\"]:\n", + " reached_target_line = True\n", + " else:\n", + " # If the line is empty, add the data of the current block to the total and reset the data of the block\n", + " if not line:\n", + " if data:\n", + " all_data.extend(data)\n", + " data = []\n", + " reached_target_line = False\n", + " else:\n", + " # Otherwise, add the line of data to the current block\n", + " line[0] = format_date(line[0]) # Format date to ISO8601\n", + " line[1] = np.float64(\n", + " line[1]\n", + " ) # Convert data type from str to float64\n", + " line[2] = np.float64(\n", + " line[2]\n", + " ) # Convert data type from str to float64\n", + "\n", + " data.append(line)\n", + "\n", + " finally:\n", + " # Close the file after reading\n", + " csvfile.close()\n", + "\n", + " # Convert all the data into a pandas DataFrame\n", + " df = pd.DataFrame(all_data, columns=[\"time\", \"pressure\", \"temperature\"])[\n", + " [\"time\", \"temperature\", \"pressure\"]\n", + " ]\n", + "\n", + " # Getting all the timestamps\n", + " time_stamps = pd.to_datetime(df[\"time\"])\n", + "\n", + " # Calculting time deltas\n", + " time_deltas = time_stamps - time_stamps.iloc[0]\n", + "\n", + " # Getting first timestamp and converting it to utc.\n", + " initial_time = time_stamps.iloc[0].strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", + " time_utc = pd.to_datetime(\n", + " convert_to_utc_with_formatting(initial_time, \"Europe/Paris\")\n", + " )\n", + "\n", + " # Calculating the new timestamps series and formatting it to ISO8601\n", + " corrected_timestamps = time_deltas + time_utc\n", + " formatted_corrected_timestamps = corrected_timestamps.dt.strftime(\n", + " \"%Y-%m-%dT%H:%M:%SZ\"\n", + " )\n", + "\n", + " # Replacing the in the dataframe\n", + " df[\"time\"] = formatted_corrected_timestamps\n", + "\n", + " # Check if the expected length matches the actual length of data extracted\n", + " if expected_length == df.shape[0]:\n", + " print(\"Extraction for tag {} complete, no missing data\".format(tag_id))\n", + " else:\n", + " print(\"Extraction for tag {} might be incomplete, be careful\".format(tag_id))\n", + "\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19225a39-ed6e-49c9-a3a0-b35a78272fa5", + "metadata": { + "scrolled": true + }, + "outputs": [], "source": [ "%%time\n", "### Remote\n", @@ -365,35 +508,68 @@ " # if os.path.exists(raw_folder):\n", "\n", " # Get list of files to iterate through\n", - " files = [tag_id for tag_id in s3.ls(\"gfts-ifremer/tags/bargip/raw\")]\n", + " tag_ids = [\n", + " tag_id.replace(\"gfts-ifremer/tags/bargip/raw/\", \"\").replace(\".CSV\", \"\")\n", + " for tag_id in s3.ls(\"gfts-ifremer/tags/bargip/raw\")\n", + " ]\n", "\n", " # Wrap files list with tqdm for progress bar\n", - " for file_path in tqdm(files, desc=\"Processing files\"):\n", - " tag_id = file_path.replace(\"gfts-ifremer/tags/bargip/raw/\", \"\").replace(\n", - " \".CSV\", \"\"\n", - " )\n", - "\n", - " # # Extract filename without extension\n", - " destination = f\"{destination_folder}{tag_id}\"\n", + " for tag_id in tqdm(tag_ids, desc=\"Processing files\"):\n", + " print(tag_id)\n", + " try:\n", + " file_path = f\"{raw_folder}/{tag_id}.CSV\"\n", "\n", - " ### Extracting tagging events from raw file\n", - " tag_events = extract_tagging_events(file_path, remote=True)\n", - " te_save_path = f\"{destination}/tagging_events.csv\"\n", - " tag_events.to_csv(te_save_path, index=False)\n", + " # # Extract filename without extension\n", + " destination = f\"{destination_folder}{tag_id}\"\n", "\n", - " # ### Extracting DST from raw file\n", - " tag_dst = extract_DST(file_path, time_zone, remote=True)\n", - " dst_save_path = f\"{destination}/dst.csv\"\n", - " tag_dst.to_csv(dst_save_path, index=True)\n", + " ### Extracting tagging events from raw file\n", + " tag_events = extract_tagging_events(file_path, remote=True)\n", + " te_save_path = f\"{destination}/tagging_events.csv\"\n", + " tag_events.to_csv(te_save_path, index=False)\n", "\n", - " ###Creating metadata files\n", - " # print(\"creating_metadata\")\n", - " create_metadata_file(file_path, destination, remote=True)\n", + " # ### Extracting DST from raw file\n", + " tag_dst = extract_DST(file_path, time_zone, remote=True)\n", + " dst_save_path = f\"{destination}/dst.csv\"\n", + " tag_dst.to_csv(dst_save_path, index=False)\n", "\n", + " ###Creating metadata files\n", + " # print(\"creating_metadata\")\n", + " create_metadata_file(file_path, destination, remote=True)\n", + " except Exception as e:\n", + " print(f\"Error for {tag_id}\")\n", + " print(e)\n", "# else:\n", "# print(\"Wrong folder for raw files\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "184e7b1d-f994-4243-94e3-d7b396efc5f5", + "metadata": {}, + "outputs": [], + "source": [ + "toto = pd.read_csv(\"s3://gfts-ifremer/tags/bargip/cleaned/DK_A10625/dst.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98b674e6-8a77-430a-9b02-da561140e4fa", + "metadata": {}, + "outputs": [], + "source": [ + "pd.read_csv(\"s3://gfts-ifremer/tags/bargip/raw/DK_A10625.CSV\", sep=\";\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89a1ce27-de2d-46ed-b980-23ccdea64cca", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/docs/pangeo-fish_papermill.ipynb b/docs/pangeo-fish_papermill.ipynb new file mode 100644 index 0000000..2516acc --- /dev/null +++ b/docs/pangeo-fish_papermill.ipynb @@ -0,0 +1,2256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "86e6f639-2455-4f5d-a557-b78b1d821ecf", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# **Example Usage of Pangeo-Fish Software**\n", + "\n", + "\n", + "**Overview:**\n", + "This Jupyter notebook demonstrates the usage of the Pangeo-Fish software, a tool designed for analyzing biologging data in reference to Earth Observation (EO) data. Specifically, it utilizes data employed in the study conducted by M. Gonze et al. titled \"Combining acoustic telemetry with archival tagging to investigate the spatial dynamics of the understudied pollack *Pollachius pollachius*,\" accepted for publication in the Journal of Fish Biology.\n", + "\n", + "We showcase the application using the biologging tag 'A19124' attached to a pollack fish, along with reference EO data from the European Union Copernicus Marine Service Information (CMEMS) product 'NORTHWESTSHELF_ANALYSIS_FORECAST_PHY_004_013'. The biologging data consist of Data Storage Tag (DST) and teledetection by acoustic signals, along with release and recapture time and location of the species in question. Both biologging data and the reference EO data are accessible with https and the access methods are incropolated in this notebook. \n", + "\n", + "\n", + "\n", + "**Purpose:**\n", + "By executing this notebook, users will learn how to set up a workflow for utilizing the Pangeo-Fish software. The workflow consists of 8 steps which are described below:\n", + "\n", + "1. **Configure the Notebook:** Prepare the notebook environment for analysis.\n", + "2. **Compare Reference Model with DST Information:** Analyze and compare data from the reference model with information from the biologging data of the species in question. \n", + "3. **Regrid the Grid from Reference Model Grid to Healpix Grid:** Transform the grid from the reference model to the Healpix grid for further analysis.\n", + "4. **Construct Emission Matrix:** Create an emission matrix based on the transformed grid.\n", + "5. **Combine and Normalize Emission Matrix:** Merge the emission matrix and normalize it for further processing.\n", + "6. **Estimate Model Parameters:** Determine the parameters of the model based on the normalized emission matrix.\n", + "7. **Compute State Probabilities and Tracks:** Calculate the probability distribution of the species in question and compute the tracks.\n", + "8. **Visualization:** Visualize the results of the analysis for interpretation and insight.\n", + "\n", + "Throughout this notebook, users will gain practical experience in setting up and executing a workflow using Pangeo-Fish, enabling them to apply similar methodologies to their own biologging data analysis tasks.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "c535925e-793d-41be-a989-4fae4cdaaa67", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## 1. **Configure the Notebook:** Prepare the notebook environment for analysis.\n", + "\n", + "In this step, we sets up the notebook environment for analysis. It includes installing necessary packages, importing required libraries, setting up parameters, and configuring the cluster for distributed computing. It also retrieves the tag data needed for analysis.\n", + "\n", + " " + ] + }, + { + "cell_type": "raw", + "id": "195dbd56-b0d7-4659-849b-2e5db4591d2f", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "!pip install ../../git/pangeo-fish/" + ] + }, + { + "cell_type": "raw", + "id": "19981551-2f17-4ac9-872c-5631edf9c0d5", + "metadata": { + "editable": true, + "raw_mimetype": "", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "!pip install copernicusmarine" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "682ef19d-ea85-49c9-a1ee-1f22d055b580", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + " var py_version = '3.4.0'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", + " var reloading = false;\n", + " var Bokeh = root.Bokeh;\n", + "\n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks;\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + " if (js_modules == null) js_modules = [];\n", + " if (js_exports == null) js_exports = {};\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + "\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " if (!reloading) {\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " }\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + " window._bokeh_on_load = on_load\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " var skip = [];\n", + " if (window.requirejs) {\n", + " window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n", + " root._bokeh_is_loading = css_urls.length + 0;\n", + " } else {\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", + " }\n", + "\n", + " var existing_stylesheets = []\n", + " var links = document.getElementsByTagName('link')\n", + " for (var i = 0; i < links.length; i++) {\n", + " var link = links[i]\n", + " if (link.href != null) {\n", + "\texisting_stylesheets.push(link.href)\n", + " }\n", + " }\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " if (existing_stylesheets.indexOf(url) !== -1) {\n", + "\ton_load()\n", + "\tcontinue;\n", + " }\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " } var existing_scripts = []\n", + " var scripts = document.getElementsByTagName('script')\n", + " for (var i = 0; i < scripts.length; i++) {\n", + " var script = scripts[i]\n", + " if (script.src != null) {\n", + "\texisting_scripts.push(script.src)\n", + " }\n", + " }\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (var i = 0; i < js_modules.length; i++) {\n", + " var url = js_modules[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (const name in js_exports) {\n", + " var url = js_exports[name];\n", + " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " element.textContent = `\n", + " import ${name} from \"${url}\"\n", + " window.${name} = ${name}\n", + " window._bokeh_on_load()\n", + " `\n", + " document.head.appendChild(element);\n", + " }\n", + " if (!js_urls.length && !js_modules.length) {\n", + " on_load()\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.holoviz.org/panel/1.4.0/dist/panel.min.js\"];\n", + " var js_modules = [];\n", + " var js_exports = {};\n", + " var css_urls = [];\n", + " var inline_js = [ function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + "function(Bokeh) {} // ensure no trailing comma for IE\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " if ((root.Bokeh !== undefined) || (force === true)) {\n", + " for (var i = 0; i < inline_js.length; i++) {\n", + "\ttry {\n", + " inline_js[i].call(root, root.Bokeh);\n", + "\t} catch(e) {\n", + "\t if (!reloading) {\n", + "\t throw e;\n", + "\t }\n", + "\t}\n", + " }\n", + " // Cache old bokeh versions\n", + " if (Bokeh != undefined && !reloading) {\n", + "\tvar NewBokeh = root.Bokeh;\n", + "\tif (Bokeh.versions === undefined) {\n", + "\t Bokeh.versions = new Map();\n", + "\t}\n", + "\tif (NewBokeh.version !== Bokeh.version) {\n", + "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", + "\t}\n", + "\troot.Bokeh = Bokeh;\n", + " }} else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " }\n", + " root._bokeh_is_initializing = false\n", + " }\n", + "\n", + " function load_or_wait() {\n", + " // Implement a backoff loop that tries to ensure we do not load multiple\n", + " // versions of Bokeh and its dependencies at the same time.\n", + " // In recent versions we use the root._bokeh_is_initializing flag\n", + " // to determine whether there is an ongoing attempt to initialize\n", + " // bokeh, however for backward compatibility we also try to ensure\n", + " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", + " // before older versions are fully initialized.\n", + " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", + " root._bokeh_is_initializing = false;\n", + " root._bokeh_onload_callbacks = undefined;\n", + " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", + " load_or_wait();\n", + " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", + " setTimeout(load_or_wait, 100);\n", + " } else {\n", + " root._bokeh_is_initializing = true\n", + " root._bokeh_onload_callbacks = []\n", + " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", + " if (!reloading && !bokeh_loaded) {\n", + "\troot.Bokeh = undefined;\n", + " }\n", + " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", + "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + "\trun_inline_js();\n", + " });\n", + " }\n", + " }\n", + " // Give older versions of the autoload script a head-start to ensure\n", + " // they initialize before we start loading newer version.\n", + " setTimeout(load_or_wait, 100)\n", + "}(window));" + ], + "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.4.0'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.holoviz.org/panel/1.4.0/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", + " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", + "}\n", + "\n", + "\n", + " function JupyterCommManager() {\n", + " }\n", + "\n", + " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", + " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " comm_manager.register_target(comm_id, function(comm) {\n", + " comm.on_msg(msg_handler);\n", + " });\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", + " comm.onMsg = msg_handler;\n", + " });\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " console.log(message)\n", + " var content = {data: message.data, comm_id};\n", + " var buffers = []\n", + " for (var buffer of message.buffers || []) {\n", + " buffers.push(new DataView(buffer))\n", + " }\n", + " var metadata = message.metadata || {};\n", + " var msg = {content, buffers, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " })\n", + " }\n", + " }\n", + "\n", + " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", + " if (comm_id in window.PyViz.comms) {\n", + " return window.PyViz.comms[comm_id];\n", + " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", + " if (msg_handler) {\n", + " comm.on_msg(msg_handler);\n", + " }\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", + " comm.open();\n", + " if (msg_handler) {\n", + " comm.onMsg = msg_handler;\n", + " }\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", + " comm_promise.then((comm) => {\n", + " window.PyViz.comms[comm_id] = comm;\n", + " if (msg_handler) {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " var content = {data: message.data};\n", + " var metadata = message.metadata || {comm_id};\n", + " var msg = {content, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " }) \n", + " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", + " return comm_promise.then((comm) => {\n", + " comm.send(data, metadata, buffers, disposeOnDone);\n", + " });\n", + " };\n", + " var comm = {\n", + " send: sendClosure\n", + " };\n", + " }\n", + " window.PyViz.comms[comm_id] = comm;\n", + " return comm;\n", + " }\n", + " window.PyViz.comm_manager = new JupyterCommManager();\n", + " \n", + "\n", + "\n", + "var JS_MIME_TYPE = 'application/javascript';\n", + "var HTML_MIME_TYPE = 'text/html';\n", + "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", + "var CLASS_NAME = 'output';\n", + "\n", + "/**\n", + " * Render data to the DOM node\n", + " */\n", + "function render(props, node) {\n", + " var div = document.createElement(\"div\");\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(div);\n", + " node.appendChild(script);\n", + "}\n", + "\n", + "/**\n", + " * Handle when a new output is added\n", + " */\n", + "function handle_add_output(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + " if (id !== undefined) {\n", + " var nchildren = toinsert.length;\n", + " var html_node = toinsert[nchildren-1].children[0];\n", + " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var scripts = [];\n", + " var nodelist = html_node.querySelectorAll(\"script\");\n", + " for (var i in nodelist) {\n", + " if (nodelist.hasOwnProperty(i)) {\n", + " scripts.push(nodelist[i])\n", + " }\n", + " }\n", + "\n", + " scripts.forEach( function (oldScript) {\n", + " var newScript = document.createElement(\"script\");\n", + " var attrs = [];\n", + " var nodemap = oldScript.attributes;\n", + " for (var j in nodemap) {\n", + " if (nodemap.hasOwnProperty(j)) {\n", + " attrs.push(nodemap[j])\n", + " }\n", + " }\n", + " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", + " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", + " oldScript.parentNode.replaceChild(newScript, oldScript);\n", + " });\n", + " if (JS_MIME_TYPE in output.data) {\n", + " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", + " }\n", + " output_area._hv_plot_id = id;\n", + " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", + " window.PyViz.plot_index[id] = Bokeh.index[id];\n", + " } else {\n", + " window.PyViz.plot_index[id] = null;\n", + " }\n", + " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + "function handle_clear_output(event, handle) {\n", + " var id = handle.cell.output_area._hv_plot_id;\n", + " var server_id = handle.cell.output_area._bokeh_server_id;\n", + " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", + " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", + " if (server_id !== null) {\n", + " comm.send({event_type: 'server_delete', 'id': server_id});\n", + " return;\n", + " } else if (comm !== null) {\n", + " comm.send({event_type: 'delete', 'id': id});\n", + " }\n", + " delete PyViz.plot_index[id];\n", + " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", + " var doc = window.Bokeh.index[id].model.document\n", + " doc.clear();\n", + " const i = window.Bokeh.documents.indexOf(doc);\n", + " if (i > -1) {\n", + " window.Bokeh.documents.splice(i, 1);\n", + " }\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle kernel restart event\n", + " */\n", + "function handle_kernel_cleanup(event, handle) {\n", + " delete PyViz.comms[\"hv-extension-comm\"];\n", + " window.PyViz.plot_index = {}\n", + "}\n", + "\n", + "/**\n", + " * Handle update_display_data messages\n", + " */\n", + "function handle_update_output(event, handle) {\n", + " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", + " handle_add_output(event, handle)\n", + "}\n", + "\n", + "function register_renderer(events, OutputArea) {\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[0]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " events.on('output_added.OutputArea', handle_add_output);\n", + " events.on('output_updated.OutputArea', handle_update_output);\n", + " events.on('clear_output.CodeCell', handle_clear_output);\n", + " events.on('delete.Cell', handle_clear_output);\n", + " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", + "\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " safe: true,\n", + " index: 0\n", + " });\n", + "}\n", + "\n", + "if (window.Jupyter !== undefined) {\n", + " try {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " } catch(err) {\n", + " }\n", + "}\n" + ], + "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "p1002" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "# Import necessary libraries and modules.\n", + "import xarray as xr\n", + "from pint_xarray import unit_registry as ureg\n", + "from pangeo_fish.io import open_tag\n", + "import intake" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cde0c569-0b64-407f-b167-bb9fe7ee4349", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "#\n", + "# Set up execution parameters for the analysis.\n", + "#\n", + "# Note: This cell is tagged as parameters, allowing automatic updates when configuring with papermil.\n", + "\n", + "# tag_name corresponds to the name of the biologging tag name (DST identification number),\n", + "# which is also a path for storing all the information for the specific fish tagged with tag_name.\n", + "# tag_name = \"AD_A11849\"\n", + "# tag_name = \"SV_A11957\"\n", + "\n", + "\n", + "tag_list = [\n", + " \"NO_A12710\",\n", + " \"CB_A11036\",\n", + " \"LT_A11385\",\n", + " \"SQ_A10684\",\n", + " \"AD_A11177\",\n", + " \"PB_A12063\",\n", + " \"NO_A12742\",\n", + " \"DK_A10642\",\n", + " \"CB_A11071\",\n", + "]\n", + "tag_name = tag_list[8]\n", + "tag_name = \"DK_A10531\"\n", + "tag_name = \"AD_A11146\"\n", + "\n", + "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", + "\n", + "# tag_root specifies the root URL for tag data used for this computation.\n", + "tag_root = f\"{cloud_root}/cleaned\"\n", + "\n", + "# catalog_url specifies the URL for the catalog for reference data used.\n", + "catalog_url = \"s3://gfts-ifremer/copernicus_catalogs/master.yml\"\n", + "\n", + "# scratch_root specifies the root directory for storing output files.\n", + "scratch_root = f\"{cloud_root}/tracks\"\n", + "\n", + "\n", + "# storage_options specifies options for the filesystem storing output files.\n", + "storage_options = {\n", + " \"anon\": False,\n", + " # 'profile' : \"gfts\",\n", + " \"client_kwargs\": {\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", + " \"region_name\": \"gra\",\n", + " },\n", + "}\n", + "\n", + "# if you are using local file system, activate following two lines\n", + "scratch_root = \"./papermill_test\"\n", + "storage_options = None\n", + "\n", + "# Default chunk value for time dimension. This values depends on the configuration of your dask cluster.\n", + "chunk_time = 24\n", + "\n", + "#\n", + "# Parameters for step 2. **Compare Reference Model with DST Information:**\n", + "#\n", + "# bbox, bounding box, defines the latitude and longitude range for the analysis area.\n", + "bbox = {\"latitude\": [42, 56], \"longitude\": [-13, 5]}\n", + "\n", + "# relative_depth_threshold defines the acceptable fish depth relative to the maximum tag depth.\n", + "# It determines whether the fish can be considered to be in a certain location based on depth.\n", + "relative_depth_threshold = 0.8\n", + "\n", + "#\n", + "# Parameters for step 3. **Regrid the Grid from Reference Model Grid to Healpix Grid:**\n", + "#\n", + "# nside defines the resolution of the healpix grid used for regridding.\n", + "nside = 4096 # *2\n", + "\n", + "# rot defines the rotation angles for the healpix grid.\n", + "rot = {\"lat\": 0, \"lon\": 30}\n", + "\n", + "# min_vertices sets the minimum number of vertices for a valid transcription for regridding.\n", + "min_vertices = 1\n", + "\n", + "#\n", + "# Parameters for step 4. **Construct Emission Matrix:**\n", + "#\n", + "# differences_std sets the standard deviation for scipy.stats.norm.pdf.\n", + "# It expresses the estimated certainty of the field of difference.\n", + "differences_std = 0.75\n", + "\n", + "# recapture_std sets the covariance for recapture event.\n", + "# It shows the certainty of the final recapture area if it is known.\n", + "recapture_std = 1e-2\n", + "\n", + "# earth_radius defines the radius of the Earth used for distance calculations.\n", + "earth_radius = ureg.Quantity(6371, \"km\")\n", + "\n", + "# maximum_speed sets the maximum allowable speed for the tagged fish.\n", + "maximum_speed = ureg.Quantity(60, \"km / day\")\n", + "\n", + "# adjustment_factor adjusts parameters for a more fuzzy search.\n", + "# It will factor the allowed maximum displacement of the fish.\n", + "adjustment_factor = 5\n", + "\n", + "# truncate sets the truncating factor for computed maximum allowed sigma for convolution process.\n", + "truncate = 4\n", + "\n", + "#\n", + "# Parameters for step 5. **Compute Additional Emission Probability Matrix:**\n", + "#\n", + "# receiver_buffer sets the maximum allowed detection distance for acoustic receivers.\n", + "receiver_buffer = ureg.Quantity(1000, \"m\")\n", + "\n", + "#\n", + "# Parameters for step 7. **Estimate Model Parameters:**\n", + "#\n", + "# tolerance sets the tolerance level for optimised parameter serarch computation.\n", + "tolerance = 1e-3\n", + "\n", + "#\n", + "# Parameters for step 8. **Compute State Probabilities and Tracks:**\n", + "#\n", + "# track_modes defines the modes for track calculation.\n", + "track_modes = [\"mean\", \"mode\"]\n", + "\n", + "# additional_track_quantities sets quantities to compute for tracks using moving pandas.\n", + "additional_track_quantities = [\"speed\", \"distance\"]\n", + "\n", + "\n", + "#\n", + "# Parameters for step 9. **Visualization:**\n", + "#\n", + "# time_step defines for each time_step value we visualize state and emission matrix.\n", + "time_step = 3\n", + "\n", + "\n", + "# Define target root directories for storing analysis results.\n", + "target_root = f\"{scratch_root}/{tag_name}\"\n", + "\n", + "# Defines default chunk size for optimisation.\n", + "default_chunk = {\"time\": chunk_time, \"lat\": -1, \"lon\": -1}\n", + "default_chunk_xy = {\"time\": chunk_time, \"x\": -1, \"y\": -1}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "653051cb-1868-43a4-a8b9-7d985ca95dcb", + "metadata": {}, + "outputs": [], + "source": [ + "# Define target root directories for storing analysis results.\n", + "target_root = f\"{scratch_root}/{tag_name}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45612cff-e622-4a3b-9879-2fac50c8cfe5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "tag_root" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1268b5c0-b1e8-4d12-b6c9-b3b7aa54f99b", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Set up a local cluster for distributed computing.\n", + "from distributed import LocalCluster\n", + "\n", + "cluster = LocalCluster()\n", + "client = cluster.get_client()\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "206aeb3c-9684-4eac-80e8-e94939529747", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Open and retrieve the tag data required for the analysis\n", + "tag = open_tag(tag_root, tag_name)\n", + "tag" + ] + }, + { + "cell_type": "markdown", + "id": "524fe17c-43b2-498b-a06b-91ddfba27b81", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## 2. **Compare Reference Model with DST Tag Information:** Analyze and compare data from the reference model with information from the biologging data of the species in question. \n", + "\n", + "In this step, we compare the reference model data with Data Storage Tag information.\n", + "The process involves reading and cleaning the reference model, aligning time, converting depth units, subtracting tag data from the model, and saving the results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4afd36b3-2121-45ec-9ffc-d03b6bda9d24", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "from pangeo_fish.cf import bounds_to_bins\n", + "from pangeo_fish.diff import diff_z\n", + "from pangeo_fish.tags import adapt_model_time, reshape_by_bins, to_time_slice\n", + "\n", + "# Drop data outside the reference interval\n", + "time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", + "time = tag[\"dst\"].ds.time\n", + "cond = (time <= time_slice.stop) & (time >= time_slice.start)\n", + "\n", + "tag_log = tag[\"dst\"].ds.where(cond, drop=True)\n", + "\n", + "min_ = tag_log.time[0]\n", + "max_ = tag_log.time[-1]\n", + "\n", + "time_slice = slice(min_.data, max_.data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da5b25fe-6028-4daf-97b8-7f0e00a1c581", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_copernicus_zarr(product_id=\"IBI_MULTIYEAR_PHY_005_002\"):\n", + " master_cat = intake.open_catalog(catalog_url)\n", + " if product_id == \"IBI_MULTIYEAR_PHY_005_002\":\n", + " # Open necessary datasets\n", + " sub_cat = master_cat[product_id]\n", + " thetao = sub_cat[\"cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m\"](\n", + " chunk=\"time\"\n", + " ).to_dask()[[\"thetao\"]]\n", + " zos = (\n", + " sub_cat[\"cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m\"](chunk=\"time\")\n", + " .to_dask()\n", + " .zos\n", + " )\n", + " deptho = sub_cat[\"cmems_mod_ibi_phy_my_0.083deg-3D_static\"].to_dask().deptho\n", + "\n", + " # Assign latitude from thetao to deptho\n", + " deptho[\"latitude\"] = thetao[\"latitude\"]\n", + "\n", + " # Create mask for deptho\n", + " mask = deptho.isnull()\n", + "\n", + " # Merge datasets and assign relevant variables\n", + " ds = (\n", + " thetao.rename({\"thetao\": \"TEMP\"}).assign(\n", + " {\n", + " \"XE\": zos,\n", + " \"H0\": deptho,\n", + " \"mask\": mask,\n", + " }\n", + " )\n", + " ).rename({\"latitude\": \"lat\", \"longitude\": \"lon\", \"elevation\": \"depth\"})\n", + "\n", + " # Ensure depth is positive\n", + " ds[\"depth\"] = abs(ds[\"depth\"])\n", + "\n", + " # Rearrange depth coordinates and assign dynamic depth and bathymetry\n", + " ds = (\n", + " ds.isel(depth=slice(None, None, -1))\n", + " .assign(\n", + " {\n", + " \"dynamic_depth\": lambda ds: (ds[\"depth\"] + ds[\"XE\"]).assign_attrs(\n", + " {\"units\": \"m\", \"positive\": \"down\"}\n", + " ),\n", + " \"dynamic_bathymetry\": lambda ds: (ds[\"H0\"] + ds[\"XE\"]).assign_attrs(\n", + " {\"units\": \"m\", \"positive\": \"down\"}\n", + " ),\n", + " }\n", + " )\n", + " .pipe(broadcast_variables, {\"lat\": \"latitude\", \"lon\": \"longitude\"})\n", + " )\n", + " # print(uris_by_key)\n", + " return ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56dc60d0-52b7-47f9-b18d-0e7b5ada0f01", + "metadata": {}, + "outputs": [], + "source": [ + "(tag_log).hvplot(x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7836186d-feae-40e9-97f2-974404d69cf8", + "metadata": {}, + "outputs": [], + "source": [ + "(tag_log).hvplot(x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4952515-7750-4fee-ad7d-a5a0032878f1", + "metadata": {}, + "outputs": [], + "source": [ + "abs(tag_log.temperature.diff(dim=\"time\")).hvplot(\n", + " x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53c69ffd-b396-4f4f-905f-a65e160476bc", + "metadata": {}, + "outputs": [], + "source": [ + "abs(tag_log.temperature.diff(dim=\"time\")).hvplot(\n", + " x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14b39398-0758-4213-b019-3927daf90050", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify the data\n", + "from pangeo_fish.io import save_html_hvplot\n", + "\n", + "plot = (\n", + " (-tag[\"dst\"].pressure).hvplot(width=1000, height=500, color=\"blue\")\n", + " * (-tag_log).hvplot.scatter(\n", + " x=\"time\", y=\"pressure\", color=\"red\", size=5, width=1000, height=500\n", + " )\n", + " * (\n", + " (tag[\"dst\"].temperature).hvplot(width=1000, height=500, color=\"blue\")\n", + " * (tag_log).hvplot.scatter(\n", + " x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500\n", + " )\n", + " )\n", + ")\n", + "filepath = f\"{target_root}/tags.html\"\n", + "\n", + "save_html_hvplot(plot, filepath, storage_options)\n", + "\n", + "plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1f02606-3999-445e-84c0-0c287502c7e9", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from pangeo_fish.io import broadcast_variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "427b5862-9e01-4dd6-bd61-0a9563213dce", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "model = get_copernicus_zarr()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db1959b6-6293-48b9-8f52-1a1e2664b702", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bab1c2d9-d4cb-4392-a07c-9442b8f05f46", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Subset the reference_model by\n", + "# - align model time with the time of tag_log, also\n", + "# - drop data for depth later that are unlikely due to the observed pressure from tag_log\n", + "# - defined latitude and longitude of bbox.\n", + "#\n", + "reference_model = (\n", + " model.sel(time=adapt_model_time(time_slice))\n", + " .sel(lat=slice(*bbox[\"latitude\"]), lon=slice(*bbox[\"longitude\"]))\n", + " .pipe(\n", + " lambda ds: ds.sel(\n", + " depth=slice(None, (tag_log[\"pressure\"].max() - ds[\"XE\"].min()).compute())\n", + " )\n", + " )\n", + ")\n", + "reference_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ca01800-cc83-471f-9e8e-c0851f006f1f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Reshape the tag log, so that it bins to the time step of reference_model\n", + "reshaped_tag = reshape_by_bins(\n", + " tag_log,\n", + " dim=\"time\",\n", + " bins=(\n", + " reference_model.cf.add_bounds([\"time\"], output_dim=\"bounds\")\n", + " .pipe(bounds_to_bins, bounds_dim=\"bounds\")\n", + " .get(\"time_bins\")\n", + " ),\n", + " bin_dim=\"bincount\",\n", + " other_dim=\"obs\",\n", + ").chunk({\"time\": chunk_time})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46d89294-10b1-4fde-861b-c69576f217dc", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "reshaped_tag" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46574d23-54f8-45e1-8619-06e05292f1c2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Subtract the time_bined tag_log from the reference_model.\n", + "# Here, for each time_bin, each observed value are compared with the correspoindng depth of reference_model using diff_z function.\n", + "#\n", + "\n", + "diff = (\n", + " diff_z(\n", + " reference_model.chunk(dict(depth=-1)),\n", + " reshaped_tag,\n", + " depth_threshold=relative_depth_threshold,\n", + " )\n", + " .assign_attrs({\"tag_id\": tag_name})\n", + " .assign(\n", + " {\n", + " \"H0\": reference_model[\"H0\"],\n", + " \"ocean_mask\": reference_model[\"H0\"].notnull(),\n", + " }\n", + " )\n", + ")\n", + "\n", + "# Persist the diff data\n", + "diff = diff.chunk(default_chunk).persist()\n", + "diff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74efae4e-53d2-4852-9b20-c5e3028e0c63", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Verify the data\n", + "# diff[\"diff\"].count([\"lat\",\"lon\"]).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecb9a957-8df3-4c22-b6ca-f709c785b17f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "target_root" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4c0325b-523c-4d58-8319-151183bb1376", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Save snapshot to disk\n", + "diff.to_zarr(f\"{target_root}/diff.zarr\", mode=\"w\", storage_options=storage_options)\n", + "\n", + "# Cleanup\n", + "del tag_log, model, reference_model, reshaped_tag, diff" + ] + }, + { + "cell_type": "markdown", + "id": "788d3e03-0e98-4355-a725-a2cce85115cf", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## 3. **Regrid the Grid from Reference Model Grid to Healpix Grid:** Transform the grid from the reference model to the Healpix grid for further analysis.\n", + "\n", + "In this step, we regrid the data from the reference model grid to a Healpix grid. This process involves defining the Healpix grid, creating the target grid, computing interpolation weights, performing the regridding, and saving the regridded data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d7dcccb-dbf3-435c-9094-4bd73497ef22", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "import numpy as np\n", + "from xarray_healpy import HealpyGridInfo, HealpyRegridder\n", + "from pangeo_fish.grid import center_longitude" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f08ff2ef-fc82-449d-90b7-c3bae7dc5d1b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "# Open the diff data and performs cleaning operations to prepare it for regridding.\n", + "\n", + "ds = (\n", + " xr.open_dataset(\n", + " f\"{target_root}/diff.zarr\",\n", + " engine=\"zarr\",\n", + " chunks={},\n", + " storage_options=storage_options,\n", + " )\n", + " .pipe(lambda ds: ds.merge(ds[[\"latitude\", \"longitude\"]].compute()))\n", + " .swap_dims({\"lat\": \"yi\", \"lon\": \"xi\"})\n", + " .drop_vars([\"lat\", \"lon\"])\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6778d294-bfae-4cdc-844c-724400ffe7b1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Define the target Healpix grid information\n", + "grid = HealpyGridInfo(level=int(np.log2(nside)), rot=rot)\n", + "target_grid = grid.target_grid(ds).pipe(center_longitude, 0)\n", + "target_grid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14fd5c95-744c-4adb-b16f-49b49bb03d5c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "grid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7078ae4b-fd5d-4021-9aad-096d4a392199", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Compute the interpolation weights for regridding the diff data\n", + "regridder = HealpyRegridder(\n", + " ds[[\"longitude\", \"latitude\", \"ocean_mask\"]],\n", + " target_grid,\n", + " method=\"bilinear\",\n", + " interpolation_kwargs={\"mask\": \"ocean_mask\", \"min_vertices\": min_vertices},\n", + ")\n", + "regridder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bc8c737-7aa4-4177-8aa4-929e3e917de9", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Perform the regridding operation using the computed interpolation weights.\n", + "regridded = regridder.regrid_ds(ds)\n", + "regridded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd14c62b-7a41-4dc9-a5b8-3d19742147c8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Reshape the regridded data to 2D\n", + "reshaped = grid.to_2d(regridded).pipe(center_longitude, 0)\n", + "reshaped = reshaped.persist()\n", + "reshaped" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcce5655-488a-4531-b0d8-8454250da579", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# This cell verifies the regridded data by plotting the count of non-NaN values.\n", + "# reshaped[\"diff\"].count([\"x\", \"y\"]).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbf59a24-980a-4a60-8dab-4fd4f8da13b8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# This cell saves the regridded data to Zarr format, then cleans up unnecessary variables to free up memory after the regridding process.\n", + "reshaped.chunk(default_chunk_xy).to_zarr(\n", + " f\"{target_root}/diff-regridded.zarr\",\n", + " mode=\"w\",\n", + " consolidated=True,\n", + " compute=True,\n", + " storage_options=storage_options,\n", + ")\n", + "# Cleanup unnecessary variables to free up memory\n", + "del ds, grid, target_grid, regridder, regridded, reshaped" + ] + }, + { + "cell_type": "markdown", + "id": "5826c0a3-33ea-469d-a675-c681f2eaf05f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## 4. **Construct Emission Matrix:** Create an emission matrix based on the transformed grid.\n", + "\n", + "In this step, we construct the emission probability matrix based on the differences between the observed tag temperature and the reference sea temperature computed in Workflow 2 and regridded in Workflow 3. The emission probability matrix represents the likelihood of observing a specific temperature difference given the model parameters and configurations.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac5febe6-f6cd-4071-a437-ced8a3727220", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "from toolz.dicttoolz import valfilter\n", + "from pangeo_fish.distributions import create_covariances, normal_at\n", + "from pangeo_fish.pdf import normal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77e66335-933a-4a4b-869b-d261aefec110", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Open the regridded diff data\n", + "differences = xr.open_dataset(\n", + " f\"{target_root}/diff-regridded.zarr\",\n", + " engine=\"zarr\",\n", + " chunks={},\n", + " storage_options=storage_options,\n", + ").pipe(lambda ds: ds.merge(ds[[\"latitude\", \"longitude\"]].compute()))\n", + "differences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a74aa0be-55c2-48c9-ac3b-b9b631c91d91", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Compute initial and final position\n", + "grid = differences[[\"latitude\", \"longitude\"]].compute()\n", + "\n", + "initial_position = tag[\"tagging_events\"].ds.sel(event_name=\"release\")\n", + "cov = create_covariances(1e-6, coord_names=[\"latitude\", \"longitude\"])\n", + "initial_probability = normal_at(\n", + " grid, pos=initial_position, cov=cov, normalize=True, axes=[\"latitude\", \"longitude\"]\n", + ")\n", + "\n", + "final_position = tag[\"tagging_events\"].ds.sel(event_name=\"fish_death\")\n", + "if final_position[[\"longitude\", \"latitude\"]].to_dataarray().isnull().all():\n", + " final_probability = None\n", + "else:\n", + " cov = create_covariances(recapture_std**2, coord_names=[\"latitude\", \"longitude\"])\n", + " final_probability = normal_at(\n", + " grid,\n", + " pos=final_position,\n", + " cov=cov,\n", + " normalize=True,\n", + " axes=[\"latitude\", \"longitude\"],\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "780a9ae3-6dab-4e8f-9148-de93f4ab9dce", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# compute emission probability matrix\n", + "\n", + "emission_pdf = (\n", + " normal(differences[\"diff\"], mean=0, std=differences_std, dims=[\"y\", \"x\"])\n", + " .to_dataset(name=\"pdf\")\n", + " .assign(\n", + " valfilter(\n", + " lambda x: x is not None,\n", + " {\n", + " \"initial\": initial_probability,\n", + " \"final\": final_probability,\n", + " \"mask\": differences[\"ocean_mask\"],\n", + " },\n", + " )\n", + " )\n", + " .assign_attrs(differences.attrs) # | {\"max_sigma\": max_sigma})\n", + ")\n", + "\n", + "emission_pdf = emission_pdf.chunk(default_chunk_xy).persist()\n", + "emission_pdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c84436de-30ff-496c-9b16-fe4287517637", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Verify the data\n", + "# emission_pdf[\"pdf\"].count([\"x\", \"y\"]).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd184239-bccf-4e9b-8d13-54222dd57621", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# This cell saves the emission data to Zarr format, then cleans up unnecessary variables to free up memory.\n", + "\n", + "emission_pdf.to_zarr(\n", + " f\"{target_root}/emission.zarr\",\n", + " mode=\"w\",\n", + " consolidated=True,\n", + " storage_options=storage_options,\n", + ")\n", + "\n", + "\n", + "del differences, grid, initial_probability, final_probability, emission_pdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "806935e6-efcb-47db-83db-1c91afb55c01", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time \n", + "# Import necessary libraries and open data and perform initial setup\n", + "from pangeo_fish import acoustic\n", + "\n", + "emission = xr.open_dataset(\n", + " f\"{target_root}/emission.zarr\",\n", + " engine=\"zarr\",\n", + " chunks={}, # \"x\": -1, \"y\": -1},\n", + " storage_options=storage_options,\n", + ")\n", + "emission" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec01a44c-7878-45e7-9fe8-dbdfcf10cfea", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Construct the emission probabilities based on acoustic detections\n", + "\n", + "acoustic_pdf = acoustic.emission_probability(\n", + " tag,\n", + " emission[[\"time\", \"cell_ids\", \"mask\"]].compute(),\n", + " receiver_buffer,\n", + " nondetections=\"mask\",\n", + ")\n", + "acoustic_pdf = acoustic_pdf.persist()\n", + "acoustic_pdf" + ] + }, + { + "cell_type": "raw", + "id": "7e23b1f9-1a19-4070-a629-fcd5a2c4d52c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Verify the data and visualize the acoustic detections\n", + "tag['acoustic'][\"deployment_id\"].hvplot.scatter(\n", + " c='red',marker='x')*(\n", + " acoustic_pdf['acoustic'] != 0).sum(dim=('y', 'x')).hvplot()" + ] + }, + { + "cell_type": "raw", + "id": "50798155-1d16-48cb-b920-1e1da8e5f0b5", + "metadata": {}, + "source": [ + "acoustic_pdf['acoustic'].count(dim=('y', 'x')).hvplot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3784adc-06e4-4864-bcec-b97133e03854", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Merge and save the combined emission probability matrix with acoustic probabilities\n", + "\n", + "combined = emission.merge(acoustic_pdf)\n", + "combined" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7792e720-ca0c-4d36-9b2a-5e1df31e07a5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# This cell saves the emission data to Zarr format, then cleans up unnecessary variables to free up memory.\n", + "\n", + "combined.to_zarr(\n", + " f\"{target_root}/emission_acoustic.zarr\",\n", + " mode=\"w\",\n", + " consolidated=True,\n", + " storage_options=storage_options,\n", + ")\n", + "# cleanup\n", + "\n", + "del emission, acoustic_pdf, combined" + ] + }, + { + "cell_type": "markdown", + "id": "3a60f229-7dc5-4ccd-b5d0-dd6910f81247", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## 6. **Combine and Normalize Emission Matrix:** Merge the emission matrix and normalize it for further processing.\n", + "\n", + "In this step, we combine the emission probability matrix constructed in Workflow 4 and 5 then normalize it to ensure that the probabilities sum up to one. This step prepares the combined emission matrix for further analysis and interpretation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bbf45e3-5129-4993-a8f4-b57387256a12", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "from pangeo_fish.pdf import combine_emission_pdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef911b87-2596-4ac1-9d86-1a61abeca0b3", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Open and combine the emission probability matrix\n", + "\n", + "combined = (\n", + " xr.open_dataset(\n", + " f\"{target_root}/emission.zarr\",\n", + " engine=\"zarr\",\n", + " chunks=default_chunk_xy,\n", + " inline_array=True,\n", + " storage_options=storage_options,\n", + " )\n", + " .pipe(combine_emission_pdf)\n", + " .chunk(default_chunk_xy)\n", + " .persist() # convert to comment if the emission matrix does *not* fit in memory\n", + ")\n", + "combined" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cf452e4-72eb-4da6-a8e3-f49a8be69078", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Verify the data and visualize the sum of probabilities\n", + "# combined[\"pdf\"].sum([\"x\", \"y\"]).hvplot(width=400)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6d55f6a-f7bb-469b-8df9-6aba977d83a9", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Save the combined and normalized emission matrix\n", + "combined.to_zarr(\n", + " f\"{target_root}/combined.zarr\",\n", + " mode=\"w\",\n", + " consolidated=True,\n", + " storage_options=storage_options,\n", + ")\n", + "del combined" + ] + }, + { + "cell_type": "markdown", + "id": "7f695094-640e-44b4-9654-2b8eb2b00efa", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## 7. **Estimate Model Parameters:** Determine the parameters of the model based on the normalized emission matrix.\n", + "\n", + "This step first estimates maxixmum allowed value of model parameter 'sigma' max_sigma. Then we\n", + "create an optimizer with an expected parameter range, fitting the model to the normalized emission matrix. \n", + "The resulting optimized parameters is saved to a json file. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25abf847-e1e2-46d7-b54a-f25e57946e0c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Import necessary libraries and modules for data analysis.\n", + "import xarray as xr\n", + "import pandas as pd\n", + "from pangeo_fish.hmm.estimator import EagerScoreEstimator\n", + "from pangeo_fish.hmm.optimize import EagerBoundsSearch\n", + "from pangeo_fish.utils import temporal_resolution\n", + "\n", + "# Open the data\n", + "emission = xr.open_dataset(\n", + " f\"{target_root}/combined.zarr\",\n", + " engine=\"zarr\",\n", + " chunks={},\n", + " inline_array=True,\n", + " storage_options=storage_options,\n", + ")\n", + "emission" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64b7afa6-196c-4210-816e-4bbf9495be44", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Compute maximum displacement for each reference model time step\n", + "# and estimate maximum sigma value for limiting the optimisation step\n", + "\n", + "earth_radius_ = xr.DataArray(earth_radius, dims=None)\n", + "\n", + "timedelta = temporal_resolution(emission[\"time\"]).pint.quantify().pint.to(\"h\")\n", + "grid_resolution = earth_radius_ * emission[\"resolution\"].pint.quantify()\n", + "\n", + "maximum_speed_ = xr.DataArray(maximum_speed, dims=None).pint.to(\"km / h\")\n", + "max_grid_displacement = maximum_speed_ * timedelta * adjustment_factor / grid_resolution\n", + "max_sigma = max_grid_displacement.pint.to(\"dimensionless\").pint.magnitude / truncate\n", + "emission.attrs[\"max_sigma\"] = max_sigma\n", + "max_sigma" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72d640c9-cc1c-4d27-8cb5-0d5f624a9a71", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Create and configure estimator and optimizer\n", + "emission = (\n", + " emission.compute()\n", + ") # Convert to comment if the emission matrix does *not* fit in memory\n", + "estimator = EagerScoreEstimator()\n", + "optimizer = EagerBoundsSearch(\n", + " estimator,\n", + " (1e-4, emission.attrs[\"max_sigma\"]),\n", + " optimizer_kwargs={\"disp\": 3, \"xtol\": tolerance},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92af7862-f1d4-43c5-b03d-e7d719035b3b", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Fit the model parameter to the data\n", + "optimized = optimizer.fit(emission)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d7d77ba-4728-48d5-91a4-1d2b46144950", + "metadata": {}, + "outputs": [], + "source": [ + "# Save the optimized parameters\n", + "params = optimized.to_dict()\n", + "pd.DataFrame.from_dict(params, orient=\"index\").to_json(\n", + " f\"{target_root}/parameters.json\", storage_options=storage_options\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87887f9d-127a-449f-bbb5-4ee1bbcaff56", + "metadata": {}, + "outputs": [], + "source": [ + "# Cleanup\n", + "del optimized, emission" + ] + }, + { + "cell_type": "markdown", + "id": "4f1f9b9d-a8a8-4bfa-b9bf-d69a37e23d74", + "metadata": {}, + "source": [ + "## 8. **Compute State Probabilities and Tracks:** Calculate the probability distribution of the species in question and compute the tracks.\n", + "\n", + "This step involves predicting state probabilities using the optimised parameter sigma computed in the last step together with normalized emission matrix. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eedfdf60-f712-4ac3-83f8-6888143cbbd3", + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary libraries and modules for data analysis.\n", + "import xarray as xr\n", + "import pandas as pd\n", + "from pangeo_fish.hmm.estimator import EagerScoreEstimator\n", + "from pangeo_fish.io import save_trajectories\n", + "\n", + "# Recreate the Estimator\n", + "params = pd.read_json(\n", + " f\"{target_root}/parameters.json\", storage_options=storage_options\n", + ").to_dict()[0]\n", + "optimized = EagerScoreEstimator(**params)\n", + "optimized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "361999bd-7c8e-4835-9129-252fa0b20209", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%time\n", + "# Load the Data\n", + "emission = xr.open_dataset(\n", + " f\"{target_root}/combined.zarr\",\n", + " engine=\"zarr\",\n", + " chunks=default_chunk_xy,\n", + " inline_array=True,\n", + " storage_options=storage_options,\n", + ").compute()\n", + "\n", + "# Predict the State Probabilities\n", + "\n", + "states = optimized.predict_proba(emission)\n", + "states = states.to_dataset().chunk(default_chunk_xy).persist()\n", + "states" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3204e35f-4708-442b-a28a-f1748f101781", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify the data and visualize the sum of probabilities\n", + "# states.sum([\"x\", \"y\"]).hvplot() +states.count([\"x\", \"y\"]).hvplot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3b03df5-8c41-49b4-9cc8-a2d709be8553", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# Save probability distirbution, state matrix.\n", + "states.chunk(default_chunk_xy).to_zarr(\n", + " f\"{target_root}/states.zarr\",\n", + " mode=\"w\",\n", + " consolidated=True,\n", + " storage_options=storage_options,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d7ad49d-b52d-47ee-a2fe-bb5855a496ab", + "metadata": {}, + "outputs": [], + "source": [ + "emission" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e3dbcb0-6b74-4cbe-90f3-75836c86ad18", + "metadata": {}, + "outputs": [], + "source": [ + "emission[\"pdf\"][0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83da1a8f-ba47-4aa8-80fc-2f0a02c01f5a", + "metadata": {}, + "outputs": [], + "source": [ + "%%time \n", + "# decode tracks\n", + "\n", + "trajectories = optimized.decode(\n", + " emission,\n", + " states.fillna(0),\n", + " mode=track_modes,\n", + " progress=False,\n", + " additional_quantities=additional_track_quantities,\n", + ")\n", + "trajectories" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fabb13b-0f14-414a-bcc7-0622e15db294", + "metadata": {}, + "outputs": [], + "source": [ + "# Save trajectories.\n", + "# Here we can chose format parquet for loading files from 'R'\n", + "# or chose to format 'geoparquet' for further analysis of tracks using\n", + "# geopands.\n", + "\n", + "save_trajectories(trajectories, target_root, storage_options, format=\"parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92ca87cf-b1c3-4eb1-8edb-9dad954243c8", + "metadata": {}, + "outputs": [], + "source": [ + "# Cleanup\n", + "del optimized, emission, states, trajectories" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/papermill_launcher.ipynb b/docs/papermill_launcher.ipynb new file mode 100644 index 0000000..baf145f --- /dev/null +++ b/docs/papermill_launcher.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ef2daa25-4ec9-4046-81cd-2644e2fb048b", + "metadata": {}, + "source": [ + "# Executing multiple parameters notebooks with papermill\n", + "___\n" + ] + }, + { + "cell_type": "raw", + "id": "d8f4af88-0bb3-4ac0-bede-eaf6245f8435", + "metadata": {}, + "source": [ + "!pip install papermill" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1283ae9-705a-4c0c-b68e-7f4986348f4f", + "metadata": {}, + "outputs": [], + "source": [ + "# Necessary imports\n", + "import papermill as pm\n", + "import s3fs\n", + "import numpy as np\n", + "from datetime import datetime\n", + "import shutil\n", + "import os\n", + "from tqdm import tqdm\n", + "import pandas as pd\n", + "import pytz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "299a8a8d-d94b-41c3-a34a-f49c1d9b9576", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Connecting to the bucket\n", + "s3 = s3fs.S3FileSystem(\n", + " anon=False,\n", + " client_kwargs={\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "183731a1-201f-4921-8f1a-b3c05243ee11", + "metadata": {}, + "outputs": [], + "source": [ + "### Parameters for the execution of the notebook\n", + "\n", + "# Tags repo is the s3like path to the data stored on the bucket.\n", + "tags_repo = \"gfts-ifremer/tags/bargip/clean_demo/\"\n", + "\n", + "# local_output is the path where the parametrized notebooks will be stored\n", + "local_output = \"papermill_output\"\n", + "\n", + "input_notebook = \"pangeo-fish_papermill.ipynb\"\n", + "\n", + "# cloud_root is the path to acces the files on remote\n", + "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", + "\n", + "# folder name is the name of the folder where the result will be stored on the bucket\n", + "folder_name = \"tracks_test\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "177d6247-f01f-46f4-9775-852e472b31f1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Setting up path for the s3 file access\n", + "tag_list = [tag.replace(tags_repo, \"\") for tag in s3.ls(tags_repo)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2f6756b-446d-4b02-b617-f8448f3ac133", + "metadata": {}, + "outputs": [], + "source": [ + "# Setting up parameters for the computation\n", + "remote = True\n", + "\n", + "if remote:\n", + " storage_options = {\n", + " \"anon\": False,\n", + " # 'profile' : \"gfts\",\n", + " \"client_kwargs\": {\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", + " \"region_name\": \"gra\",\n", + " },\n", + " }\n", + " scratch_root = f\"{cloud_root}/{folder_name}\"\n", + "\n", + "else:\n", + " storage_options = None\n", + " scratch_root = f\"/home/jovyan/notebooks/papermill/{folder_name}\" # Update this path with your local path where you want to it to be stored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1795cc81-65d1-4118-8343-721e870b4dc3", + "metadata": {}, + "outputs": [], + "source": [ + "# param is the dict passed as an argument to papermill\n", + "param = {\"storage_options\": storage_options, \"scratch_root\": scratch_root}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7aa7cf51-1fc1-4fbd-9fb2-51576245766a", + "metadata": {}, + "outputs": [], + "source": [ + "# Verifying the params for the location\n", + "param" + ] + }, + { + "cell_type": "markdown", + "id": "4172f475-70b0-4f7b-ab5d-8b4b116c3347", + "metadata": {}, + "source": [ + "![warning]()" + ] + }, + { + "cell_type": "markdown", + "id": "461e67e0-aeb9-437d-9c79-fb9e4f31a3be", + "metadata": {}, + "source": [ + "### BE CAREFUL FOR THE PATH CHOOSEN FOR **SCRATCH ROOT**, THIS PATH IS THE DIRECTORY WHERE THE RESULT OF THE COMPUTATION WILL BE STORED. WATCH OUT TO NOT OVERWRITE SOMETHING !" + ] + }, + { + "cell_type": "markdown", + "id": "ed4dbc07-91bc-4d21-86ea-41411d4393bf", + "metadata": {}, + "source": [ + "___\n", + "### Explantion of the code below \n", + "- nbs is a list of the notebooks that has been processed, wether they failed or not.\n", + "- The code loops over the tag id present in tag list and calculates the time difference in the tagging events.\n", + "- If the fish has observation over 2 days and has not been processed yet, it starts running a parametrized notebook.\n", + "- If it succeds, the generated notebook is placed papermill_output/done, else, it goes at papermill_output/failed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75406c75-efd1-40fb-891a-07088cf3d3de", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%time\n", + "timezone = pytz.timezone(\"Europe/Paris\")\n", + "os.makedirs(f\"{local_output}/done\", exist_ok=True)\n", + "os.makedirs(f\"{local_output}/failed\", exist_ok=True)\n", + "nbs = []\n", + "nbs = [nb.replace(\".ipynb\", \"\") for nb in os.listdir(f\"{local_output}/done\")]\n", + "for fail in os.listdir(f\"{local_output}/failed/\"):\n", + " nbs.append(fail.replace(\".ipynb\", \"\"))\n", + "\n", + "for tag_name in tqdm(tag_list, desc=\"Processing tags\"):\n", + " try:\n", + " te = pd.read_csv(s3.open(f\"{cloud_root}/cleaned/{tag_name}/tagging_events.csv\"))\n", + " np_datetime1 = np.datetime64(\n", + " datetime.strptime(te[\"time\"][0], \"%Y-%m-%dT%H:%M:%SZ\")\n", + " )\n", + " np_datetime2 = np.datetime64(\n", + " datetime.strptime(te[\"time\"][1], \"%Y-%m-%dT%H:%M:%SZ\")\n", + " )\n", + " time_difference = (np_datetime2 - np_datetime1) / np.timedelta64(1, \"D\")\n", + "\n", + " if (\n", + " (tag_name not in nbs) and time_difference > 2\n", + " ): # Use this statement if you already start a computation that has been interrupted but the generation is still valid\n", + " # if time_difference > 2: # Use this if you want every tag in tag list to be processed\n", + "\n", + " print(tag_name)\n", + " print(datetime.now(timezone).strftime(\"%Y-%m-%d %H:%M:%S\"))\n", + " param[\"tag_name\"] = tag_name\n", + " output_path = f\"{local_output}/{tag_name}.ipynb\"\n", + " destination_path = f\"{local_output}/done/{tag_name}.ipynb\"\n", + " pm.execute_notebook(\n", + " input_path=input_notebook, output_path=output_path, parameters=param\n", + " )\n", + " shutil.move(output_path, destination_path)\n", + " except Exception:\n", + " print(f\"Error for {tag_name}\")\n", + " destination_path = f\"{local_output}/failed/{tag_name}.ipynb\"\n", + " shutil.move(output_path, destination_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From b1e8d52cd65fb047fa70132c96447b8bdf038c6c Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Thu, 29 Aug 2024 15:42:02 +0000 Subject: [PATCH 03/13] Fix for the template Fix for the template, wrong version as pushed. After multiple try to pass the pre-commit I don't know why the precommit acts like this --- docs/pangeo-fish_papermill.ipynb | 3525 +++++++++++++++++++++--------- 1 file changed, 2500 insertions(+), 1025 deletions(-) diff --git a/docs/pangeo-fish_papermill.ipynb b/docs/pangeo-fish_papermill.ipynb index 2516acc..c580e15 100644 --- a/docs/pangeo-fish_papermill.ipynb +++ b/docs/pangeo-fish_papermill.ipynb @@ -15,23 +15,22 @@ "\n", "\n", "**Overview:**\n", - "This Jupyter notebook demonstrates the usage of the Pangeo-Fish software, a tool designed for analyzing biologging data in reference to Earth Observation (EO) data. Specifically, it utilizes data employed in the study conducted by M. Gonze et al. titled \"Combining acoustic telemetry with archival tagging to investigate the spatial dynamics of the understudied pollack *Pollachius pollachius*,\" accepted for publication in the Journal of Fish Biology.\n", - "\n", - "We showcase the application using the biologging tag 'A19124' attached to a pollack fish, along with reference EO data from the European Union Copernicus Marine Service Information (CMEMS) product 'NORTHWESTSHELF_ANALYSIS_FORECAST_PHY_004_013'. The biologging data consist of Data Storage Tag (DST) and teledetection by acoustic signals, along with release and recapture time and location of the species in question. Both biologging data and the reference EO data are accessible with https and the access methods are incropolated in this notebook. \n", - "\n", + "This Jupyter notebook demonstrates the usage of the Pangeo-Fish software, a tool designed for analyzing biologging data in reference to Earth Observation (EO) data.\n", "\n", + "The biologging data consist of Data Storage Tag (DST), along with release and recapture time and location of the species in question. Both biologging data and the reference EO data are accessible with https and the access methods are incorporated in this notebook. \n", "\n", "**Purpose:**\n", - "By executing this notebook, users will learn how to set up a workflow for utilizing the Pangeo-Fish software. The workflow consists of 8 steps which are described below:\n", + "By executing this notebook, users will learn how to set up a workflow for utilizing the Pangeo-Fish software. The workflow consists of 9 steps which are described below:\n", "\n", "1. **Configure the Notebook:** Prepare the notebook environment for analysis.\n", "2. **Compare Reference Model with DST Information:** Analyze and compare data from the reference model with information from the biologging data of the species in question. \n", "3. **Regrid the Grid from Reference Model Grid to Healpix Grid:** Transform the grid from the reference model to the Healpix grid for further analysis.\n", "4. **Construct Emission Matrix:** Create an emission matrix based on the transformed grid.\n", - "5. **Combine and Normalize Emission Matrix:** Merge the emission matrix and normalize it for further processing.\n", - "6. **Estimate Model Parameters:** Determine the parameters of the model based on the normalized emission matrix.\n", - "7. **Compute State Probabilities and Tracks:** Calculate the probability distribution of the species in question and compute the tracks.\n", - "8. **Visualization:** Visualize the results of the analysis for interpretation and insight.\n", + "5. **Replace emission for flagged tags:** If the tags are flagged for warm water, then it use the detection file associated and change the flagged timestamps.\n", + "6. **Combine and Normalize Emission Matrix:** Merge the emission matrix and normalize it for further processing.\n", + "7. **Estimate Model Parameters:** Determine the parameters of the model based on the normalized emission matrix.\n", + "8. **Compute State Probabilities and Tracks:** Calculate the probability distribution of the species in question and compute the tracks.\n", + "9. **Visualization:** Visualize the results of the analysis for interpretation and insight.\n", "\n", "Throughout this notebook, users will gain practical experience in setting up and executing a workflow using Pangeo-Fish, enabling them to apply similar methodologies to their own biologging data analysis tasks.\n", "\n" @@ -87,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "id": "682ef19d-ea85-49c9-a1ee-1f22d055b580", "metadata": { "editable": true, @@ -96,777 +95,1735 @@ }, "tags": [] }, + "outputs": [], + "source": [ + "# Import necessary libraries and modules.\n", + "import xarray as xr\n", + "from pint_xarray import unit_registry as ureg\n", + "from pangeo_fish.io import open_tag\n", + "import intake\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cde0c569-0b64-407f-b167-bb9fe7ee4349", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "#\n", + "# Set up execution parameters for the analysis.\n", + "#\n", + "# Note: This cell is tagged as parameters, allowing automatic updates when configuring with papermil.\n", + "\n", + "# tag_name corresponds to the name of the biologging tag name (DST identification number),\n", + "# which is also a path for storing all the information for the specific fish tagged with tag_name.\n", + "# tag_name = \"AD_A11849\"\n", + "# tag_name = \"SV_A11957\"\n", + "\n", + "\n", + "tag_list = [\n", + " \"NO_A12710\",\n", + " \"CB_A11036\",\n", + " \"LT_A11385\",\n", + " \"SQ_A10684\",\n", + " \"AD_A11177\",\n", + " \"PB_A12063\",\n", + " \"NO_A12742\",\n", + " \"DK_A10642\",\n", + " \"CB_A11071\",\n", + "]\n", + "tag_name = tag_list[8]\n", + "tag_name = \"DK_A10531\"\n", + "\n", + "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", + "\n", + "# tag_root specifies the root URL for tag data used for this computation.\n", + "tag_root = f\"{cloud_root}/cleaned\"\n", + "\n", + "# catalog_url specifies the URL for the catalog for reference data used.\n", + "catalog_url = \"s3://gfts-ifremer/copernicus_catalogs/master.yml\"\n", + "\n", + "# scratch_root specifies the root directory for storing output files.\n", + "scratch_root = f\"{cloud_root}/tracks\"\n", + "\n", + "\n", + "# storage_options specifies options for the filesystem storing output files.\n", + "storage_options = {\n", + " \"anon\": False,\n", + " # 'profile' : \"gfts\",\n", + " \"client_kwargs\": {\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", + " \"region_name\": \"gra\",\n", + " },\n", + "}\n", + "\n", + "# if you are using local file system, activate following two lines\n", + "folder_name = \"../toto\"\n", + "storage_options = None\n", + "scratch_root = f\"/home/jovyan/notebooks/papermill/{folder_name}\"\n", + "\n", + "# Default chunk value for time dimension. This values depends on the configuration of your dask cluster.\n", + "chunk_time = 24\n", + "\n", + "#\n", + "# Parameters for step 2. **Compare Reference Model with DST Information:**\n", + "#\n", + "# bbox, bounding box, defines the latitude and longitude range for the analysis area.\n", + "bbox = {\"latitude\": [40, 56], \"longitude\": [-13, 5]}\n", + "\n", + "# relative_depth_threshold defines the acceptable fish depth relative to the maximum tag depth.\n", + "# It determines whether the fish can be considered to be in a certain location based on depth.\n", + "relative_depth_threshold = 0.8\n", + "\n", + "#\n", + "# Parameters for step 3. **Regrid the Grid from Reference Model Grid to Healpix Grid:**\n", + "#\n", + "# Distance filepath is the path to the coastal distance file.\n", + "distance_filepath = \"s3://gfts-ifremer/tags/distance2coast.zarr\"\n", + "\n", + "# distance_scale_factor scales the squared distance in the exponential decay function.\n", + "distance_scale_factor = 0.01\n", + "\n", + "# nside defines the resolution of the healpix grid used for regridding.\n", + "nside = 4096 # *2\n", + "\n", + "# rot defines the rotation angles for the healpix grid.\n", + "rot = {\"lat\": 0, \"lon\": 30}\n", + "\n", + "# min_vertices sets the minimum number of vertices for a valid transcription for regridding.\n", + "min_vertices = 1\n", + "\n", + "#\n", + "# Parameters for step 4. **Construct Emission Matrix:**\n", + "#\n", + "# differences_std sets the standard deviation for scipy.stats.norm.pdf.\n", + "# It expresses the estimated certainty of the field of difference.\n", + "differences_std = 0.75\n", + "\n", + "# recapture_std sets the covariance for recapture event.\n", + "# It shows the certainty of the final recapture area if it is known.\n", + "recapture_std = 1e-2\n", + "\n", + "# earth_radius defines the radius of the Earth used for distance calculations.\n", + "earth_radius = ureg.Quantity(6371, \"km\")\n", + "\n", + "# maximum_speed sets the maximum allowable speed for the tagged fish.\n", + "maximum_speed = ureg.Quantity(20, \"km / day\")\n", + "\n", + "# adjustment_factor adjusts parameters for a more fuzzy search.\n", + "# It will factor the allowed maximum displacement of the fish.\n", + "adjustment_factor = 5\n", + "\n", + "# truncate sets the truncating factor for computed maximum allowed sigma for convolution process.\n", + "truncate = 4\n", + "\n", + "#\n", + "# Parameters for step 5. **Compute Additional Emission Probability Matrix:**\n", + "#\n", + "\n", + "\n", + "# buffer_size sets the size of the powerplant warm plume.\n", + "buffer_size = ureg.Quantity(1000, \"m\")\n", + "# powerplant_flag is a boolean that states if the fish has swam in warm plume\n", + "\n", + "\n", + "#\n", + "# Parameters for step 7. **Estimate Model Parameters:**\n", + "#\n", + "# tolerance sets the tolerance level for optimised parameter serarch computation.\n", + "tolerance = 1e-3\n", + "\n", + "#\n", + "# Parameters for step 8. **Compute State Probabilities and Tracks:**\n", + "#\n", + "# track_modes defines the modes for track calculation.\n", + "track_modes = [\"mean\", \"mode\"]\n", + "\n", + "# additional_track_quantities sets quantities to compute for tracks using moving pandas.\n", + "additional_track_quantities = [\"speed\", \"distance\"]\n", + "\n", + "\n", + "#\n", + "# Parameters for step 9. **Visualization:**\n", + "#\n", + "# time_step defines for each time_step value we visualize state and emission matrix.\n", + "time_step = 3\n", + "\n", + "\n", + "# Define target root directories for storing analysis results.\n", + "target_root = f\"{scratch_root}/{tag_name}\"\n", + "\n", + "# Defines default chunk size for optimisation.\n", + "default_chunk = {\"time\": chunk_time, \"lat\": -1, \"lon\": -1}\n", + "default_chunk_xy = {\"time\": chunk_time, \"x\": -1, \"y\": -1}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "653051cb-1868-43a4-a8b9-7d985ca95dcb", + "metadata": {}, + "outputs": [], + "source": [ + "# Define target root directories for storing analysis results.\n", + "target_root = f\"{scratch_root}/{tag_name}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b855b1c2-2e68-4d58-bab7-29f581afe32d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/jovyan/notebooks/papermill/../toto/DK_A10531'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target_root" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "45612cff-e622-4a3b-9879-2fac50c8cfe5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { - "application/javascript": [ - "(function(root) {\n", - " function now() {\n", - " return new Date();\n", - " }\n", + "text/plain": [ + "'s3://gfts-ifremer/tags/bargip/cleaned'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tag_root" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1e5ef786-4d46-435b-8b80-215a017d89eb", + "metadata": {}, + "outputs": [], + "source": [ + "warm_plume = pd.read_csv(\n", + " \"s3://gfts-ifremer/tags/bargip/bar_flag_warm_plume.txt\", sep=\"\\t\"\n", + ")\n", + "warm_list = list(warm_plume[warm_plume[\"warm_plume\"] == True][\"tag_name\"])\n", + "cond = tag_name in warm_list\n", + "\n", + "if cond:\n", + " powerplant_flag = True\n", + "else:\n", + " powerplant_flag = False" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "37fd021e-71d0-4837-9033-cc417943514d", + "metadata": {}, + "outputs": [], + "source": [ + "if powerplant_flag is True:\n", + " detection_file = f\"{tag_root}/{tag_name}/detection.csv\"\n", + " powerplant_file = f\"{cloud_root}/nuclear_plant_loc.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1268b5c0-b1e8-4d12-b6c9-b3b7aa54f99b", + "metadata": { + "editable": true, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
\n", + "

Client

\n", + "

Client-b8edf70c-661c-11ef-81cb-9aeb6565527d

\n", + " \n", "\n", - " var force = true;\n", - " var py_version = '3.4.0'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", - " var reloading = false;\n", - " var Bokeh = root.Bokeh;\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", "\n", - " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", - " root._bokeh_timeout = Date.now() + 5000;\n", - " root._bokeh_failed_load = false;\n", - " }\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", "\n", - " function run_callbacks() {\n", - " try {\n", - " root._bokeh_onload_callbacks.forEach(function(callback) {\n", - " if (callback != null)\n", - " callback();\n", - " });\n", - " } finally {\n", - " delete root._bokeh_onload_callbacks;\n", - " }\n", - " console.debug(\"Bokeh: all callbacks have finished\");\n", - " }\n", + "
Connection method: Cluster objectCluster type: distributed.LocalCluster
\n", + " Dashboard: http://127.0.0.1:8787/status\n", + "
\n", "\n", - " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", - " if (css_urls == null) css_urls = [];\n", - " if (js_urls == null) js_urls = [];\n", - " if (js_modules == null) js_modules = [];\n", - " if (js_exports == null) js_exports = {};\n", + " \n", + " \n", + " \n", "\n", - " root._bokeh_onload_callbacks.push(callback);\n", + " \n", + "
\n", + "

Cluster Info

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

LocalCluster

\n", + "

12d4ffc1

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", "\n", - " if (root._bokeh_is_loading > 0) {\n", - " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", - " return null;\n", - " }\n", - " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", - " run_callbacks();\n", - " return null;\n", - " }\n", - " if (!reloading) {\n", - " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", - " }\n", + " \n", + "
\n", + " Dashboard: http://127.0.0.1:8787/status\n", + " \n", + " Workers: 3\n", + "
\n", + " Total threads: 6\n", + " \n", + " Total memory: 24.00 GiB\n", + "
Status: runningUsing processes: True
\n", "\n", - " function on_load() {\n", - " root._bokeh_is_loading--;\n", - " if (root._bokeh_is_loading === 0) {\n", - " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", - " run_callbacks()\n", - " }\n", - " }\n", - " window._bokeh_on_load = on_load\n", + "
\n", + " \n", + "

Scheduler Info

\n", + "
\n", "\n", - " function on_error() {\n", - " console.error(\"failed to load \" + url);\n", - " }\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

Scheduler

\n", + "

Scheduler-56c91c47-eaf3-4862-a916-47d10875bbc1

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Comm: tcp://127.0.0.1:37569\n", + " \n", + " Workers: 3\n", + "
\n", + " Dashboard: http://127.0.0.1:8787/status\n", + " \n", + " Total threads: 6\n", + "
\n", + " Started: Just now\n", + " \n", + " Total memory: 24.00 GiB\n", + "
\n", + "
\n", + "
\n", "\n", - " var skip = [];\n", - " if (window.requirejs) {\n", - " window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n", - " root._bokeh_is_loading = css_urls.length + 0;\n", - " } else {\n", - " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", - " }\n", + "
\n", + " \n", + "

Workers

\n", + "
\n", "\n", - " var existing_stylesheets = []\n", - " var links = document.getElementsByTagName('link')\n", - " for (var i = 0; i < links.length; i++) {\n", - " var link = links[i]\n", - " if (link.href != null) {\n", - "\texisting_stylesheets.push(link.href)\n", - " }\n", - " }\n", - " for (var i = 0; i < css_urls.length; i++) {\n", - " var url = css_urls[i];\n", - " if (existing_stylesheets.indexOf(url) !== -1) {\n", - "\ton_load()\n", - "\tcontinue;\n", - " }\n", - " const element = document.createElement(\"link\");\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.rel = \"stylesheet\";\n", - " element.type = \"text/css\";\n", - " element.href = url;\n", - " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", - " document.body.appendChild(element);\n", - " } var existing_scripts = []\n", - " var scripts = document.getElementsByTagName('script')\n", - " for (var i = 0; i < scripts.length; i++) {\n", - " var script = scripts[i]\n", - " if (script.src != null) {\n", - "\texisting_scripts.push(script.src)\n", - " }\n", - " }\n", - " for (var i = 0; i < js_urls.length; i++) {\n", - " var url = js_urls[i];\n", - " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.src = url;\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.head.appendChild(element);\n", - " }\n", - " for (var i = 0; i < js_modules.length; i++) {\n", - " var url = js_modules[i];\n", - " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.src = url;\n", - " element.type = \"module\";\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.head.appendChild(element);\n", - " }\n", - " for (const name in js_exports) {\n", - " var url = js_exports[name];\n", - " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.type = \"module\";\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " element.textContent = `\n", - " import ${name} from \"${url}\"\n", - " window.${name} = ${name}\n", - " window._bokeh_on_load()\n", - " `\n", - " document.head.appendChild(element);\n", - " }\n", - " if (!js_urls.length && !js_modules.length) {\n", - " on_load()\n", - " }\n", - " };\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 0

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", "\n", - " function inject_raw_css(css) {\n", - " const element = document.createElement(\"style\");\n", - " element.appendChild(document.createTextNode(css));\n", - " document.body.appendChild(element);\n", - " }\n", + " \n", "\n", - " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.holoviz.org/panel/1.4.0/dist/panel.min.js\"];\n", - " var js_modules = [];\n", - " var js_exports = {};\n", - " var css_urls = [];\n", - " var inline_js = [ function(Bokeh) {\n", - " Bokeh.set_log_level(\"info\");\n", - " },\n", - "function(Bokeh) {} // ensure no trailing comma for IE\n", - " ];\n", + " \n", "\n", - " function run_inline_js() {\n", - " if ((root.Bokeh !== undefined) || (force === true)) {\n", - " for (var i = 0; i < inline_js.length; i++) {\n", - "\ttry {\n", - " inline_js[i].call(root, root.Bokeh);\n", - "\t} catch(e) {\n", - "\t if (!reloading) {\n", - "\t throw e;\n", - "\t }\n", - "\t}\n", - " }\n", - " // Cache old bokeh versions\n", - " if (Bokeh != undefined && !reloading) {\n", - "\tvar NewBokeh = root.Bokeh;\n", - "\tif (Bokeh.versions === undefined) {\n", - "\t Bokeh.versions = new Map();\n", - "\t}\n", - "\tif (NewBokeh.version !== Bokeh.version) {\n", - "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", - "\t}\n", - "\troot.Bokeh = Bokeh;\n", - " }} else if (Date.now() < root._bokeh_timeout) {\n", - " setTimeout(run_inline_js, 100);\n", - " } else if (!root._bokeh_failed_load) {\n", - " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", - " root._bokeh_failed_load = true;\n", - " }\n", - " root._bokeh_is_initializing = false\n", - " }\n", + "
\n", + " Comm: tcp://127.0.0.1:36385\n", + " \n", + " Total threads: 2\n", + "
\n", + " Dashboard: http://127.0.0.1:37373/status\n", + " \n", + " Memory: 8.00 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:39201\n", + "
\n", + " Local directory: /tmp/dask-scratch-space/worker-zfryeqlj\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 1

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", "\n", - " function load_or_wait() {\n", - " // Implement a backoff loop that tries to ensure we do not load multiple\n", - " // versions of Bokeh and its dependencies at the same time.\n", - " // In recent versions we use the root._bokeh_is_initializing flag\n", - " // to determine whether there is an ongoing attempt to initialize\n", - " // bokeh, however for backward compatibility we also try to ensure\n", - " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", - " // before older versions are fully initialized.\n", - " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", - " root._bokeh_is_initializing = false;\n", - " root._bokeh_onload_callbacks = undefined;\n", - " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", - " load_or_wait();\n", - " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", - " setTimeout(load_or_wait, 100);\n", - " } else {\n", - " root._bokeh_is_initializing = true\n", - " root._bokeh_onload_callbacks = []\n", - " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", - " if (!reloading && !bokeh_loaded) {\n", - "\troot.Bokeh = undefined;\n", - " }\n", - " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", - "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", - "\trun_inline_js();\n", - " });\n", - " }\n", - " }\n", - " // Give older versions of the autoload script a head-start to ensure\n", - " // they initialize before we start loading newer version.\n", - " setTimeout(load_or_wait, 100)\n", - "}(window));" + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:33789\n", + " \n", + " Total threads: 2\n", + "
\n", + " Dashboard: http://127.0.0.1:43807/status\n", + " \n", + " Memory: 8.00 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:36055\n", + "
\n", + " Local directory: /tmp/dask-scratch-space/worker-_ig3y5un\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Worker: 2

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + "\n", + " \n", + "\n", + "
\n", + " Comm: tcp://127.0.0.1:34455\n", + " \n", + " Total threads: 2\n", + "
\n", + " Dashboard: http://127.0.0.1:46481/status\n", + " \n", + " Memory: 8.00 GiB\n", + "
\n", + " Nanny: tcp://127.0.0.1:39461\n", + "
\n", + " Local directory: /tmp/dask-scratch-space/worker-j2jcan7e\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + "
\n", + "
" ], - "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.4.0'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.holoviz.org/panel/1.4.0/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" + "text/plain": [ + "" + ] }, + "execution_count": 15, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" + } + ], + "source": [ + "# Set up a local cluster for distributed computing.\n", + "from distributed import LocalCluster\n", + "\n", + "cluster = LocalCluster()\n", + "client = cluster.get_client()\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "206aeb3c-9684-4eac-80e8-e94939529747", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" }, + "tags": [] + }, + "outputs": [ { "data": { - "application/javascript": [ + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.holoviews_exec.v0+json": "", - "text/html": [ - "
\n", - "
\n", - "
\n", - "" - ] - }, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "p1002" - } - }, - "output_type": "display_data" - } - ], - "source": [ - "# Import necessary libraries and modules.\n", - "import xarray as xr\n", - "from pint_xarray import unit_registry as ureg\n", - "from pangeo_fish.io import open_tag\n", - "import intake" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cde0c569-0b64-407f-b167-bb9fe7ee4349", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "#\n", - "# Set up execution parameters for the analysis.\n", - "#\n", - "# Note: This cell is tagged as parameters, allowing automatic updates when configuring with papermil.\n", - "\n", - "# tag_name corresponds to the name of the biologging tag name (DST identification number),\n", - "# which is also a path for storing all the information for the specific fish tagged with tag_name.\n", - "# tag_name = \"AD_A11849\"\n", - "# tag_name = \"SV_A11957\"\n", - "\n", - "\n", - "tag_list = [\n", - " \"NO_A12710\",\n", - " \"CB_A11036\",\n", - " \"LT_A11385\",\n", - " \"SQ_A10684\",\n", - " \"AD_A11177\",\n", - " \"PB_A12063\",\n", - " \"NO_A12742\",\n", - " \"DK_A10642\",\n", - " \"CB_A11071\",\n", - "]\n", - "tag_name = tag_list[8]\n", - "tag_name = \"DK_A10531\"\n", - "tag_name = \"AD_A11146\"\n", - "\n", - "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", - "\n", - "# tag_root specifies the root URL for tag data used for this computation.\n", - "tag_root = f\"{cloud_root}/cleaned\"\n", - "\n", - "# catalog_url specifies the URL for the catalog for reference data used.\n", - "catalog_url = \"s3://gfts-ifremer/copernicus_catalogs/master.yml\"\n", - "\n", - "# scratch_root specifies the root directory for storing output files.\n", - "scratch_root = f\"{cloud_root}/tracks\"\n", - "\n", - "\n", - "# storage_options specifies options for the filesystem storing output files.\n", - "storage_options = {\n", - " \"anon\": False,\n", - " # 'profile' : \"gfts\",\n", - " \"client_kwargs\": {\n", - " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", - " \"region_name\": \"gra\",\n", - " },\n", - "}\n", - "\n", - "# if you are using local file system, activate following two lines\n", - "scratch_root = \"./papermill_test\"\n", - "storage_options = None\n", - "\n", - "# Default chunk value for time dimension. This values depends on the configuration of your dask cluster.\n", - "chunk_time = 24\n", - "\n", - "#\n", - "# Parameters for step 2. **Compare Reference Model with DST Information:**\n", - "#\n", - "# bbox, bounding box, defines the latitude and longitude range for the analysis area.\n", - "bbox = {\"latitude\": [42, 56], \"longitude\": [-13, 5]}\n", - "\n", - "# relative_depth_threshold defines the acceptable fish depth relative to the maximum tag depth.\n", - "# It determines whether the fish can be considered to be in a certain location based on depth.\n", - "relative_depth_threshold = 0.8\n", - "\n", - "#\n", - "# Parameters for step 3. **Regrid the Grid from Reference Model Grid to Healpix Grid:**\n", - "#\n", - "# nside defines the resolution of the healpix grid used for regridding.\n", - "nside = 4096 # *2\n", - "\n", - "# rot defines the rotation angles for the healpix grid.\n", - "rot = {\"lat\": 0, \"lon\": 30}\n", - "\n", - "# min_vertices sets the minimum number of vertices for a valid transcription for regridding.\n", - "min_vertices = 1\n", - "\n", - "#\n", - "# Parameters for step 4. **Construct Emission Matrix:**\n", - "#\n", - "# differences_std sets the standard deviation for scipy.stats.norm.pdf.\n", - "# It expresses the estimated certainty of the field of difference.\n", - "differences_std = 0.75\n", - "\n", - "# recapture_std sets the covariance for recapture event.\n", - "# It shows the certainty of the final recapture area if it is known.\n", - "recapture_std = 1e-2\n", - "\n", - "# earth_radius defines the radius of the Earth used for distance calculations.\n", - "earth_radius = ureg.Quantity(6371, \"km\")\n", - "\n", - "# maximum_speed sets the maximum allowable speed for the tagged fish.\n", - "maximum_speed = ureg.Quantity(60, \"km / day\")\n", - "\n", - "# adjustment_factor adjusts parameters for a more fuzzy search.\n", - "# It will factor the allowed maximum displacement of the fish.\n", - "adjustment_factor = 5\n", - "\n", - "# truncate sets the truncating factor for computed maximum allowed sigma for convolution process.\n", - "truncate = 4\n", - "\n", - "#\n", - "# Parameters for step 5. **Compute Additional Emission Probability Matrix:**\n", - "#\n", - "# receiver_buffer sets the maximum allowed detection distance for acoustic receivers.\n", - "receiver_buffer = ureg.Quantity(1000, \"m\")\n", - "\n", - "#\n", - "# Parameters for step 7. **Estimate Model Parameters:**\n", - "#\n", - "# tolerance sets the tolerance level for optimised parameter serarch computation.\n", - "tolerance = 1e-3\n", - "\n", - "#\n", - "# Parameters for step 8. **Compute State Probabilities and Tracks:**\n", - "#\n", - "# track_modes defines the modes for track calculation.\n", - "track_modes = [\"mean\", \"mode\"]\n", - "\n", - "# additional_track_quantities sets quantities to compute for tracks using moving pandas.\n", - "additional_track_quantities = [\"speed\", \"distance\"]\n", - "\n", - "\n", - "#\n", - "# Parameters for step 9. **Visualization:**\n", - "#\n", - "# time_step defines for each time_step value we visualize state and emission matrix.\n", - "time_step = 3\n", - "\n", - "\n", - "# Define target root directories for storing analysis results.\n", - "target_root = f\"{scratch_root}/{tag_name}\"\n", - "\n", - "# Defines default chunk size for optimisation.\n", - "default_chunk = {\"time\": chunk_time, \"lat\": -1, \"lon\": -1}\n", - "default_chunk_xy = {\"time\": chunk_time, \"x\": -1, \"y\": -1}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "653051cb-1868-43a4-a8b9-7d985ca95dcb", - "metadata": {}, - "outputs": [], + "\n", + ".xr-section-summary-in:disabled + label:before {\n", + " color: var(--xr-disabled-color);\n", + "}\n", + "\n", + ".xr-section-summary-in:checked + label:before {\n", + " content: '▼';\n", + "}\n", + "\n", + ".xr-section-summary-in:checked + label > span {\n", + " display: none;\n", + "}\n", + "\n", + ".xr-section-summary,\n", + ".xr-section-inline-details {\n", + " padding-top: 4px;\n", + " padding-bottom: 4px;\n", + "}\n", + "\n", + ".xr-section-inline-details {\n", + " grid-column: 2 / -1;\n", + "}\n", + "\n", + ".xr-section-details {\n", + " display: none;\n", + " grid-column: 1 / -1;\n", + " margin-bottom: 5px;\n", + "}\n", + "\n", + ".xr-section-summary-in:checked ~ .xr-section-details {\n", + " display: contents;\n", + "}\n", + "\n", + ".xr-array-wrap {\n", + " grid-column: 1 / -1;\n", + " display: grid;\n", + " grid-template-columns: 20px auto;\n", + "}\n", + "\n", + ".xr-array-wrap > label {\n", + " grid-column: 1;\n", + " vertical-align: top;\n", + "}\n", + "\n", + ".xr-preview {\n", + " color: var(--xr-font-color3);\n", + "}\n", + "\n", + ".xr-array-preview,\n", + ".xr-array-data {\n", + " padding: 0 5px !important;\n", + " grid-column: 2;\n", + "}\n", + "\n", + ".xr-array-data,\n", + ".xr-array-in:checked ~ .xr-array-preview {\n", + " display: none;\n", + "}\n", + "\n", + ".xr-array-in:checked ~ .xr-array-data,\n", + ".xr-array-preview {\n", + " display: inline-block;\n", + "}\n", + "\n", + ".xr-dim-list {\n", + " display: inline-block !important;\n", + " list-style: none;\n", + " padding: 0 !important;\n", + " margin: 0;\n", + "}\n", + "\n", + ".xr-dim-list li {\n", + " display: inline-block;\n", + " padding: 0;\n", + " margin: 0;\n", + "}\n", + "\n", + ".xr-dim-list:before {\n", + " content: '(';\n", + "}\n", + "\n", + ".xr-dim-list:after {\n", + " content: ')';\n", + "}\n", + "\n", + ".xr-dim-list li:not(:last-child):after {\n", + " content: ',';\n", + " padding-right: 5px;\n", + "}\n", + "\n", + ".xr-has-index {\n", + " font-weight: bold;\n", + "}\n", + "\n", + ".xr-var-list,\n", + ".xr-var-item {\n", + " display: contents;\n", + "}\n", + "\n", + ".xr-var-item > div,\n", + ".xr-var-item label,\n", + ".xr-var-item > .xr-var-name span {\n", + " background-color: var(--xr-background-color-row-even);\n", + " margin-bottom: 0;\n", + "}\n", + "\n", + ".xr-var-item > .xr-var-name:hover span {\n", + " padding-right: 5px;\n", + "}\n", + "\n", + ".xr-var-list > li:nth-child(odd) > div,\n", + ".xr-var-list > li:nth-child(odd) > label,\n", + ".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n", + " background-color: var(--xr-background-color-row-odd);\n", + "}\n", + "\n", + ".xr-var-name {\n", + " grid-column: 1;\n", + "}\n", + "\n", + ".xr-var-dims {\n", + " grid-column: 2;\n", + "}\n", + "\n", + ".xr-var-dtype {\n", + " grid-column: 3;\n", + " text-align: right;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", + ".xr-var-preview {\n", + " grid-column: 4;\n", + "}\n", + "\n", + ".xr-index-preview {\n", + " grid-column: 2 / 5;\n", + " color: var(--xr-font-color2);\n", + "}\n", + "\n", + ".xr-var-name,\n", + ".xr-var-dims,\n", + ".xr-var-dtype,\n", + ".xr-preview,\n", + ".xr-attrs dt {\n", + " white-space: nowrap;\n", + " overflow: hidden;\n", + " text-overflow: ellipsis;\n", + " padding-right: 10px;\n", + "}\n", + "\n", + ".xr-var-name:hover,\n", + ".xr-var-dims:hover,\n", + ".xr-var-dtype:hover,\n", + ".xr-attrs dt:hover {\n", + " overflow: visible;\n", + " width: auto;\n", + " z-index: 1;\n", + "}\n", + "\n", + ".xr-var-attrs,\n", + ".xr-var-data,\n", + ".xr-index-data {\n", + " display: none;\n", + " background-color: var(--xr-background-color) !important;\n", + " padding-bottom: 5px !important;\n", + "}\n", + "\n", + ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", + ".xr-var-data-in:checked ~ .xr-var-data,\n", + ".xr-index-data-in:checked ~ .xr-index-data {\n", + " display: block;\n", + "}\n", + "\n", + ".xr-var-data > table {\n", + " float: right;\n", + "}\n", + "\n", + ".xr-var-name span,\n", + ".xr-var-data,\n", + ".xr-index-name div,\n", + ".xr-index-data,\n", + ".xr-attrs {\n", + " padding-left: 25px !important;\n", + "}\n", + "\n", + ".xr-attrs,\n", + ".xr-var-attrs,\n", + ".xr-var-data,\n", + ".xr-index-data {\n", + " grid-column: 1 / -1;\n", + "}\n", + "\n", + "dl.xr-attrs {\n", + " padding: 0;\n", + " margin: 0;\n", + " display: grid;\n", + " grid-template-columns: 125px auto;\n", + "}\n", + "\n", + ".xr-attrs dt,\n", + ".xr-attrs dd {\n", + " padding: 0;\n", + " margin: 0;\n", + " float: left;\n", + " padding-right: 10px;\n", + " width: auto;\n", + "}\n", + "\n", + ".xr-attrs dt {\n", + " font-weight: normal;\n", + " grid-column: 1;\n", + "}\n", + "\n", + ".xr-attrs dt:hover span {\n", + " display: inline-block;\n", + " background: var(--xr-background-color);\n", + " padding-right: 10px;\n", + "}\n", + "\n", + ".xr-attrs dd {\n", + " grid-column: 2;\n", + " white-space: pre-wrap;\n", + " word-break: break-all;\n", + "}\n", + "\n", + ".xr-icon-database,\n", + ".xr-icon-file-text2,\n", + ".xr-no-icon {\n", + " display: inline-block;\n", + " vertical-align: middle;\n", + " width: 1em;\n", + " height: 1.5em !important;\n", + " stroke-width: 0;\n", + " stroke: currentColor;\n", + " fill: currentColor;\n", + "}\n", + "
<xarray.DatasetView> Size: 0B\n",
+       "Dimensions:  ()\n",
+       "Data variables:\n",
+       "    *empty*\n",
+       "Attributes:\n",
+       "    pit_tag_id:       DK_A10531\n",
+       "    scientific_name:  Dicentrarchus labrax\n",
+       "    common_name:      European seabass\n",
+       "    project:          BARGIP
" + ], + "text/plain": [ + "DataTree('None', parent=None)\n", + "│ Dimensions: ()\n", + "│ Data variables:\n", + "│ *empty*\n", + "│ Attributes:\n", + "│ pit_tag_id: DK_A10531\n", + "│ scientific_name: Dicentrarchus labrax\n", + "│ common_name: European seabass\n", + "│ project: BARGIP\n", + "├── DataTree('dst')\n", + "│ Dimensions: (time: 112760)\n", + "│ Coordinates:\n", + "│ * time (time) datetime64[ns] 902kB 2014-06-05T22:00:00 ... 2014-10-...\n", + "│ Data variables:\n", + "│ temperature (time) float64 902kB 20.56 20.56 20.56 ... 23.59 23.62 23.66\n", + "│ pressure (time) float64 902kB -0.38 -0.38 -0.38 ... -1.01 -1.01 -1.01\n", + "└── DataTree('tagging_events')\n", + " Dimensions: (event_name: 2)\n", + " Coordinates:\n", + " * event_name (event_name) object 16B 'release' 'fish_death'\n", + " Data variables:\n", + " time (event_name) datetime64[ns] 16B 2014-06-06T17:35:00 2014-08-1...\n", + " longitude (event_name) float64 16B 2.172 2.292\n", + " latitude (event_name) float64 16B 51.06 51.05" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Define target root directories for storing analysis results.\n", - "target_root = f\"{scratch_root}/{tag_name}\"" + "# Open and retrieve the tag data required for the analysis\n", + "tag = open_tag(tag_root, tag_name)\n", + "tag" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "45612cff-e622-4a3b-9879-2fac50c8cfe5", + "cell_type": "markdown", + "id": "524fe17c-43b2-498b-a06b-91ddfba27b81", "metadata": { "editable": true, "slideshow": { @@ -874,80 +1831,619 @@ }, "tags": [] }, - "outputs": [], "source": [ - "tag_root" + "## 2. **Compare Reference Model with DST Tag Information:** Analyze and compare data from the reference model with information from the biologging data of the species in question. \n", + "\n", + "In this step, we compare the reference model data with Data Storage Tag information.\n", + "The process involves reading and cleaning the reference model, aligning time, converting depth units, subtracting tag data from the model, and saving the results." ] }, { "cell_type": "code", - "execution_count": null, - "id": "1268b5c0-b1e8-4d12-b6c9-b3b7aa54f99b", + "execution_count": 21, + "id": "4afd36b3-2121-45ec-9ffc-d03b6bda9d24", "metadata": { "editable": true, - "scrolled": true, "slideshow": { "slide_type": "" }, "tags": [] }, - "outputs": [], - "source": [ - "# Set up a local cluster for distributed computing.\n", - "from distributed import LocalCluster\n", - "\n", - "cluster = LocalCluster()\n", - "client = cluster.get_client()\n", - "client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "206aeb3c-9684-4eac-80e8-e94939529747", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + "outputs": [ + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + " var py_version = '3.4.0'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", + " var reloading = false;\n", + " var Bokeh = root.Bokeh;\n", + "\n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks;\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + " if (js_modules == null) js_modules = [];\n", + " if (js_exports == null) js_exports = {};\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + "\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " if (!reloading) {\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " }\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + " window._bokeh_on_load = on_load\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " var skip = [];\n", + " if (window.requirejs) {\n", + " window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n", + " root._bokeh_is_loading = css_urls.length + 0;\n", + " } else {\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", + " }\n", + "\n", + " var existing_stylesheets = []\n", + " var links = document.getElementsByTagName('link')\n", + " for (var i = 0; i < links.length; i++) {\n", + " var link = links[i]\n", + " if (link.href != null) {\n", + "\texisting_stylesheets.push(link.href)\n", + " }\n", + " }\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " if (existing_stylesheets.indexOf(url) !== -1) {\n", + "\ton_load()\n", + "\tcontinue;\n", + " }\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " } var existing_scripts = []\n", + " var scripts = document.getElementsByTagName('script')\n", + " for (var i = 0; i < scripts.length; i++) {\n", + " var script = scripts[i]\n", + " if (script.src != null) {\n", + "\texisting_scripts.push(script.src)\n", + " }\n", + " }\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (var i = 0; i < js_modules.length; i++) {\n", + " var url = js_modules[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (const name in js_exports) {\n", + " var url = js_exports[name];\n", + " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " element.textContent = `\n", + " import ${name} from \"${url}\"\n", + " window.${name} = ${name}\n", + " window._bokeh_on_load()\n", + " `\n", + " document.head.appendChild(element);\n", + " }\n", + " if (!js_urls.length && !js_modules.length) {\n", + " on_load()\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.holoviz.org/panel/1.4.0/dist/panel.min.js\"];\n", + " var js_modules = [];\n", + " var js_exports = {};\n", + " var css_urls = [];\n", + " var inline_js = [ function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + "function(Bokeh) {} // ensure no trailing comma for IE\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " if ((root.Bokeh !== undefined) || (force === true)) {\n", + " for (var i = 0; i < inline_js.length; i++) {\n", + "\ttry {\n", + " inline_js[i].call(root, root.Bokeh);\n", + "\t} catch(e) {\n", + "\t if (!reloading) {\n", + "\t throw e;\n", + "\t }\n", + "\t}\n", + " }\n", + " // Cache old bokeh versions\n", + " if (Bokeh != undefined && !reloading) {\n", + "\tvar NewBokeh = root.Bokeh;\n", + "\tif (Bokeh.versions === undefined) {\n", + "\t Bokeh.versions = new Map();\n", + "\t}\n", + "\tif (NewBokeh.version !== Bokeh.version) {\n", + "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", + "\t}\n", + "\troot.Bokeh = Bokeh;\n", + " }} else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " }\n", + " root._bokeh_is_initializing = false\n", + " }\n", + "\n", + " function load_or_wait() {\n", + " // Implement a backoff loop that tries to ensure we do not load multiple\n", + " // versions of Bokeh and its dependencies at the same time.\n", + " // In recent versions we use the root._bokeh_is_initializing flag\n", + " // to determine whether there is an ongoing attempt to initialize\n", + " // bokeh, however for backward compatibility we also try to ensure\n", + " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", + " // before older versions are fully initialized.\n", + " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", + " root._bokeh_is_initializing = false;\n", + " root._bokeh_onload_callbacks = undefined;\n", + " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", + " load_or_wait();\n", + " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", + " setTimeout(load_or_wait, 100);\n", + " } else {\n", + " root._bokeh_is_initializing = true\n", + " root._bokeh_onload_callbacks = []\n", + " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", + " if (!reloading && !bokeh_loaded) {\n", + "\troot.Bokeh = undefined;\n", + " }\n", + " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", + "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + "\trun_inline_js();\n", + " });\n", + " }\n", + " }\n", + " // Give older versions of the autoload script a head-start to ensure\n", + " // they initialize before we start loading newer version.\n", + " setTimeout(load_or_wait, 100)\n", + "}(window));" + ], + "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.4.0'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.holoviz.org/panel/1.4.0/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Open and retrieve the tag data required for the analysis\n", - "tag = open_tag(tag_root, tag_name)\n", - "tag" - ] - }, - { - "cell_type": "markdown", - "id": "524fe17c-43b2-498b-a06b-91ddfba27b81", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + { + "data": { + "application/javascript": [ + "\n", + "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", + " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", + "}\n", + "\n", + "\n", + " function JupyterCommManager() {\n", + " }\n", + "\n", + " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", + " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " comm_manager.register_target(comm_id, function(comm) {\n", + " comm.on_msg(msg_handler);\n", + " });\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", + " comm.onMsg = msg_handler;\n", + " });\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " console.log(message)\n", + " var content = {data: message.data, comm_id};\n", + " var buffers = []\n", + " for (var buffer of message.buffers || []) {\n", + " buffers.push(new DataView(buffer))\n", + " }\n", + " var metadata = message.metadata || {};\n", + " var msg = {content, buffers, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " })\n", + " }\n", + " }\n", + "\n", + " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", + " if (comm_id in window.PyViz.comms) {\n", + " return window.PyViz.comms[comm_id];\n", + " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", + " if (msg_handler) {\n", + " comm.on_msg(msg_handler);\n", + " }\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", + " comm.open();\n", + " if (msg_handler) {\n", + " comm.onMsg = msg_handler;\n", + " }\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", + " comm_promise.then((comm) => {\n", + " window.PyViz.comms[comm_id] = comm;\n", + " if (msg_handler) {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " var content = {data: message.data};\n", + " var metadata = message.metadata || {comm_id};\n", + " var msg = {content, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " }) \n", + " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", + " return comm_promise.then((comm) => {\n", + " comm.send(data, metadata, buffers, disposeOnDone);\n", + " });\n", + " };\n", + " var comm = {\n", + " send: sendClosure\n", + " };\n", + " }\n", + " window.PyViz.comms[comm_id] = comm;\n", + " return comm;\n", + " }\n", + " window.PyViz.comm_manager = new JupyterCommManager();\n", + " \n", + "\n", + "\n", + "var JS_MIME_TYPE = 'application/javascript';\n", + "var HTML_MIME_TYPE = 'text/html';\n", + "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", + "var CLASS_NAME = 'output';\n", + "\n", + "/**\n", + " * Render data to the DOM node\n", + " */\n", + "function render(props, node) {\n", + " var div = document.createElement(\"div\");\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(div);\n", + " node.appendChild(script);\n", + "}\n", + "\n", + "/**\n", + " * Handle when a new output is added\n", + " */\n", + "function handle_add_output(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + " if (id !== undefined) {\n", + " var nchildren = toinsert.length;\n", + " var html_node = toinsert[nchildren-1].children[0];\n", + " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var scripts = [];\n", + " var nodelist = html_node.querySelectorAll(\"script\");\n", + " for (var i in nodelist) {\n", + " if (nodelist.hasOwnProperty(i)) {\n", + " scripts.push(nodelist[i])\n", + " }\n", + " }\n", + "\n", + " scripts.forEach( function (oldScript) {\n", + " var newScript = document.createElement(\"script\");\n", + " var attrs = [];\n", + " var nodemap = oldScript.attributes;\n", + " for (var j in nodemap) {\n", + " if (nodemap.hasOwnProperty(j)) {\n", + " attrs.push(nodemap[j])\n", + " }\n", + " }\n", + " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", + " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", + " oldScript.parentNode.replaceChild(newScript, oldScript);\n", + " });\n", + " if (JS_MIME_TYPE in output.data) {\n", + " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", + " }\n", + " output_area._hv_plot_id = id;\n", + " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", + " window.PyViz.plot_index[id] = Bokeh.index[id];\n", + " } else {\n", + " window.PyViz.plot_index[id] = null;\n", + " }\n", + " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + "function handle_clear_output(event, handle) {\n", + " var id = handle.cell.output_area._hv_plot_id;\n", + " var server_id = handle.cell.output_area._bokeh_server_id;\n", + " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", + " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", + " if (server_id !== null) {\n", + " comm.send({event_type: 'server_delete', 'id': server_id});\n", + " return;\n", + " } else if (comm !== null) {\n", + " comm.send({event_type: 'delete', 'id': id});\n", + " }\n", + " delete PyViz.plot_index[id];\n", + " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", + " var doc = window.Bokeh.index[id].model.document\n", + " doc.clear();\n", + " const i = window.Bokeh.documents.indexOf(doc);\n", + " if (i > -1) {\n", + " window.Bokeh.documents.splice(i, 1);\n", + " }\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle kernel restart event\n", + " */\n", + "function handle_kernel_cleanup(event, handle) {\n", + " delete PyViz.comms[\"hv-extension-comm\"];\n", + " window.PyViz.plot_index = {}\n", + "}\n", + "\n", + "/**\n", + " * Handle update_display_data messages\n", + " */\n", + "function handle_update_output(event, handle) {\n", + " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", + " handle_add_output(event, handle)\n", + "}\n", + "\n", + "function register_renderer(events, OutputArea) {\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[0]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " events.on('output_added.OutputArea', handle_add_output);\n", + " events.on('output_updated.OutputArea', handle_update_output);\n", + " events.on('clear_output.CodeCell', handle_clear_output);\n", + " events.on('delete.Cell', handle_clear_output);\n", + " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", + "\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " safe: true,\n", + " index: 0\n", + " });\n", + "}\n", + "\n", + "if (window.Jupyter !== undefined) {\n", + " try {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " } catch(err) {\n", + " }\n", + "}\n" + ], + "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" + }, + "metadata": {}, + "output_type": "display_data" }, - "tags": [] - }, - "source": [ - "## 2. **Compare Reference Model with DST Tag Information:** Analyze and compare data from the reference model with information from the biologging data of the species in question. \n", - "\n", - "In this step, we compare the reference model data with Data Storage Tag information.\n", - "The process involves reading and cleaning the reference model, aligning time, converting depth units, subtracting tag data from the model, and saving the results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4afd36b3-2121-45ec-9ffc-d03b6bda9d24", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - "tags": [] - }, - "outputs": [], + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "p1002" + } + }, + "output_type": "display_data" + } + ], "source": [ "# Import necessary libraries\n", "from pangeo_fish.cf import bounds_to_bins\n", @@ -969,7 +2465,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "da5b25fe-6028-4daf-97b8-7f0e00a1c581", "metadata": { "editable": true, @@ -995,8 +2491,9 @@ " )\n", " deptho = sub_cat[\"cmems_mod_ibi_phy_my_0.083deg-3D_static\"].to_dask().deptho\n", "\n", - " # Assign latitude from thetao to deptho\n", + " # Assign latitude and longitude from thetao to deptho to shift in positions\n", " deptho[\"latitude\"] = thetao[\"latitude\"]\n", + " deptho[\"longitude\"] = thetao[\"longitude\"]\n", "\n", " # Create mask for deptho\n", " mask = deptho.isnull()\n", @@ -1035,55 +2532,9 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "56dc60d0-52b7-47f9-b18d-0e7b5ada0f01", - "metadata": {}, - "outputs": [], - "source": [ - "(tag_log).hvplot(x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7836186d-feae-40e9-97f2-974404d69cf8", - "metadata": {}, - "outputs": [], - "source": [ - "(tag_log).hvplot(x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c4952515-7750-4fee-ad7d-a5a0032878f1", - "metadata": {}, - "outputs": [], - "source": [ - "abs(tag_log.temperature.diff(dim=\"time\")).hvplot(\n", - " x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "53c69ffd-b396-4f4f-905f-a65e160476bc", - "metadata": {}, - "outputs": [], - "source": [ - "abs(tag_log.temperature.diff(dim=\"time\")).hvplot(\n", - " x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "14b39398-0758-4213-b019-3927daf90050", + "cell_type": "raw", + "id": "b6763746-0735-483c-aa07-5d43557351d5", "metadata": {}, - "outputs": [], "source": [ "# Verify the data\n", "from pangeo_fish.io import save_html_hvplot\n", @@ -1104,12 +2555,12 @@ "\n", "save_html_hvplot(plot, filepath, storage_options)\n", "\n", - "plot" + "# plot" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "e1f02606-3999-445e-84c0-0c287502c7e9", "metadata": { "editable": true, @@ -1125,7 +2576,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "427b5862-9e01-4dd6-bd61-0a9563213dce", "metadata": { "editable": true, @@ -1134,30 +2585,25 @@ }, "tags": [] }, - "outputs": [], - "source": [ - "model = get_copernicus_zarr()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db1959b6-6293-48b9-8f52-1a1e2664b702", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/srv/conda/envs/notebook/lib/python3.11/site-packages/intake_xarray/base.py:21: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.\n", + " 'dims': dict(self._ds.dims),\n", + "/srv/conda/envs/notebook/lib/python3.11/site-packages/intake_xarray/base.py:21: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.\n", + " 'dims': dict(self._ds.dims),\n" + ] + } + ], "source": [ - "model" + "model = get_copernicus_zarr()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "bab1c2d9-d4cb-4392-a07c-9442b8f05f46", "metadata": { "editable": true, @@ -1181,13 +2627,12 @@ " depth=slice(None, (tag_log[\"pressure\"].max() - ds[\"XE\"].min()).compute())\n", " )\n", " )\n", - ")\n", - "reference_model" + ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "3ca01800-cc83-471f-9e8e-c0851f006f1f", "metadata": { "editable": true, @@ -1196,7 +2641,16 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 276 ms, sys: 36.5 ms, total: 312 ms\n", + "Wall time: 295 ms\n" + ] + } + ], "source": [ "%%time\n", "# Reshape the tag log, so that it bins to the time step of reference_model\n", @@ -1215,23 +2669,7 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "46d89294-10b1-4fde-861b-c69576f217dc", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "reshaped_tag" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "46574d23-54f8-45e1-8619-06e05292f1c2", "metadata": { "editable": true, @@ -1263,12 +2701,12 @@ "\n", "# Persist the diff data\n", "diff = diff.chunk(default_chunk).persist()\n", - "diff" + "# diff" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "74efae4e-53d2-4852-9b20-c5e3028e0c63", "metadata": { "editable": true, @@ -1278,7 +2716,16 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2 µs, sys: 0 ns, total: 2 µs\n", + "Wall time: 4.53 µs\n" + ] + } + ], "source": [ "%%time\n", "# Verify the data\n", @@ -1288,17 +2735,22 @@ { "cell_type": "code", "execution_count": null, - "id": "ecb9a957-8df3-4c22-b6ca-f709c785b17f", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, + "id": "2b65b45d-7d94-4761-8e9f-d958eabfedb8", + "metadata": {}, "outputs": [], "source": [ - "target_root" + "diff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41e73dee-69bb-4827-af07-ec84e3a5120d", + "metadata": {}, + "outputs": [], + "source": [ + "target_lat = diff[\"lat\"]\n", + "target_lon = diff[\"lon\"]" ] }, { @@ -1352,6 +2804,7 @@ "outputs": [], "source": [ "# Import necessary libraries\n", + "import s3fs\n", "import numpy as np\n", "from xarray_healpy import HealpyGridInfo, HealpyRegridder\n", "from pangeo_fish.grid import center_longitude" @@ -1383,7 +2836,6 @@ " )\n", " .pipe(lambda ds: ds.merge(ds[[\"latitude\", \"longitude\"]].compute()))\n", " .swap_dims({\"lat\": \"yi\", \"lon\": \"xi\"})\n", - " .drop_vars([\"lat\", \"lon\"])\n", ")\n", "ds" ] @@ -1391,27 +2843,88 @@ { "cell_type": "code", "execution_count": null, - "id": "6778d294-bfae-4cdc-844c-724400ffe7b1", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, + "id": "96096bcb-a9c5-45e3-8bfa-8f1dea01e324", + "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "# Define the target Healpix grid information\n", - "grid = HealpyGridInfo(level=int(np.log2(nside)), rot=rot)\n", - "target_grid = grid.target_grid(ds).pipe(center_longitude, 0)\n", - "target_grid" + "s3 = s3fs.S3FileSystem(\n", + " anon=False,\n", + " client_kwargs={\n", + " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd419691-b48d-41e7-b5f5-bcd4bf2ce9ad", + "metadata": {}, + "outputs": [], + "source": [ + "coastal_distance = xr.open_zarr(distance_filepath).sel(\n", + " lat=slice(56, 40), lon=slice(-13, 5)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5d18c5f-e323-45be-9984-0509f34a8e4e", + "metadata": {}, + "outputs": [], + "source": [ + "coastal_distance = coastal_distance.sortby(\"lat\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37b25b39-3253-4d0d-8c5b-8399ce412b44", + "metadata": {}, + "outputs": [], + "source": [ + "coastal_distance = coastal_distance.interp(\n", + " lat=target_lat, lon=target_lon, method=\"linear\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "902c96ba-04db-4197-8b25-825dc3f81ce0", + "metadata": {}, + "outputs": [], + "source": [ + "coastal_distance[\"dist\"] = 1 + np.exp(\n", + " -(coastal_distance.dist * coastal_distance.dist) * distance_scale_factor\n", + ")" + ] + }, + { + "cell_type": "raw", + "id": "15935fe4-9c00-41b7-b44c-7bd3fd81ae2d", + "metadata": {}, + "source": [ + "(coastal_distance).dist.hvplot(geo=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe4feb54-0d9d-424f-9215-7194d5dd1d23", + "metadata": {}, + "outputs": [], + "source": [ + "coastal_distance = coastal_distance.swap_dims({\"lat\": \"yi\", \"lon\": \"xi\"}).drop_vars(\n", + " [\"lat\", \"lon\"]\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "14fd5c95-744c-4adb-b16f-49b49bb03d5c", + "id": "6778d294-bfae-4cdc-844c-724400ffe7b1", "metadata": { "editable": true, "slideshow": { @@ -1421,7 +2934,11 @@ }, "outputs": [], "source": [ - "grid" + "%%time\n", + "# Define the target Healpix grid information\n", + "grid = HealpyGridInfo(level=int(np.log2(nside)), rot=rot)\n", + "target_grid = grid.target_grid(ds).pipe(center_longitude, 0)\n", + "target_grid" ] }, { @@ -1467,6 +2984,16 @@ "regridded" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "6857fe5f-2e64-4b42-81df-c8724376301e", + "metadata": {}, + "outputs": [], + "source": [ + "regridded_coastal = regridder.regrid_ds(coastal_distance)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1487,6 +3014,16 @@ "reshaped" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc4ebc2f-db47-4106-b0f1-02a8ddfa9dc0", + "metadata": {}, + "outputs": [], + "source": [ + "reshaped_coastal = grid.to_2d(regridded_coastal).pipe(center_longitude, 0)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1504,6 +3041,26 @@ "# reshaped[\"diff\"].count([\"x\", \"y\"]).plot()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "9640d4ec-3863-4803-b907-a876d01235bb", + "metadata": {}, + "outputs": [], + "source": [ + "coastal_chunk = {\"x\": default_chunk_xy[\"x\"], \"y\": default_chunk_xy[\"y\"]}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da754a3c-9c7c-4bef-b3e5-306ec512f3ca", + "metadata": {}, + "outputs": [], + "source": [ + "reshaped[\"diff\"] = reshaped[\"diff\"] / reshaped_coastal[\"dist\"]" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1526,8 +3083,16 @@ " compute=True,\n", " storage_options=storage_options,\n", ")\n", + "\n", + "reshaped_coastal.chunk(coastal_chunk).to_zarr(\n", + " f\"{target_root}/coastal.zarr\",\n", + " mode=\"w\",\n", + " consolidated=True,\n", + " compute=True,\n", + " storage_options=storage_options,\n", + ")\n", "# Cleanup unnecessary variables to free up memory\n", - "del ds, grid, target_grid, regridder, regridded, reshaped" + "del ds, grid, target_grid, regridder, regridded, reshaped, reshaped_coastal" ] }, { @@ -1702,14 +3267,21 @@ " storage_options=storage_options,\n", ")\n", "\n", - "\n", "del differences, grid, initial_probability, final_probability, emission_pdf" ] }, + { + "cell_type": "markdown", + "id": "2096e0f0-eebd-434d-9dd6-5e1a40b4da63", + "metadata": {}, + "source": [ + "## 5. **Replace emission for the tags with warm spikes detected**" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "806935e6-efcb-47db-83db-1c91afb55c01", + "id": "d23d1259-79b4-45c6-a976-7f70295821cd", "metadata": { "editable": true, "scrolled": true, @@ -1720,9 +3292,10 @@ }, "outputs": [], "source": [ - "%%time \n", + "import pandas as pd\n", + "from pangeo_fish.heat import heat_regulation, powerpalnt_emission_map\n", "# Import necessary libraries and open data and perform initial setup\n", - "from pangeo_fish import acoustic\n", + "\n", "\n", "emission = xr.open_dataset(\n", " f\"{target_root}/emission.zarr\",\n", @@ -1736,96 +3309,25 @@ { "cell_type": "code", "execution_count": null, - "id": "ec01a44c-7878-45e7-9fe8-dbdfcf10cfea", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Construct the emission probabilities based on acoustic detections\n", - "\n", - "acoustic_pdf = acoustic.emission_probability(\n", - " tag,\n", - " emission[[\"time\", \"cell_ids\", \"mask\"]].compute(),\n", - " receiver_buffer,\n", - " nondetections=\"mask\",\n", - ")\n", - "acoustic_pdf = acoustic_pdf.persist()\n", - "acoustic_pdf" - ] - }, - { - "cell_type": "raw", - "id": "7e23b1f9-1a19-4070-a629-fcd5a2c4d52c", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "# Verify the data and visualize the acoustic detections\n", - "tag['acoustic'][\"deployment_id\"].hvplot.scatter(\n", - " c='red',marker='x')*(\n", - " acoustic_pdf['acoustic'] != 0).sum(dim=('y', 'x')).hvplot()" - ] - }, - { - "cell_type": "raw", - "id": "50798155-1d16-48cb-b920-1e1da8e5f0b5", + "id": "6c2045e2-31c8-4fa5-a64e-e95de2bd61a4", "metadata": {}, - "source": [ - "acoustic_pdf['acoustic'].count(dim=('y', 'x')).hvplot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c3784adc-06e4-4864-bcec-b97133e03854", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Merge and save the combined emission probability matrix with acoustic probabilities\n", - "\n", - "combined = emission.merge(acoustic_pdf)\n", - "combined" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7792e720-ca0c-4d36-9b2a-5e1df31e07a5", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, "outputs": [], "source": [ - "# This cell saves the emission data to Zarr format, then cleans up unnecessary variables to free up memory.\n", + "if powerplant_flag:\n", + " # Loading detections, formatting and reducing observation window\n", + " detections = pd.read_csv(detection_file).set_index(\"time\").to_xarray()\n", + " detections[\"time\"] = detections[\"time\"].astype(\"datetime64\")\n", + " detections = detections.sel(\n", + " time=emission[\"time\"]\n", + " ) # Narrowing the data to the observed days only\n", "\n", - "combined.to_zarr(\n", - " f\"{target_root}/emission_acoustic.zarr\",\n", - " mode=\"w\",\n", - " consolidated=True,\n", - " storage_options=storage_options,\n", - ")\n", - "# cleanup\n", + " pp_map = (\n", + " pd.read_csv(powerplant_file, sep=\";\").drop(\"Country\", axis=1).to_xarray()\n", + " ) # Loading powerplant locations data\n", "\n", - "del emission, acoustic_pdf, combined" + " # Combining and replacing the emission map at the given timestamps for the days where warm plume are detected\n", + " combined_masks = powerpalnt_emission_map(pp_map, emission, buffer_size, rot)\n", + " emission = heat_regulation(emission, detections, combined_masks)" ] }, { @@ -1878,14 +3380,7 @@ "# Open and combine the emission probability matrix\n", "\n", "combined = (\n", - " xr.open_dataset(\n", - " f\"{target_root}/emission.zarr\",\n", - " engine=\"zarr\",\n", - " chunks=default_chunk_xy,\n", - " inline_array=True,\n", - " storage_options=storage_options,\n", - " )\n", - " .pipe(combine_emission_pdf)\n", + " emission.pipe(combine_emission_pdf)\n", " .chunk(default_chunk_xy)\n", " .persist() # convert to comment if the emission matrix does *not* fit in memory\n", ")\n", @@ -2165,26 +3660,6 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d7ad49d-b52d-47ee-a2fe-bb5855a496ab", - "metadata": {}, - "outputs": [], - "source": [ - "emission" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5e3dbcb0-6b74-4cbe-90f3-75836c86ad18", - "metadata": {}, - "outputs": [], - "source": [ - "emission[\"pdf\"][0]" - ] - }, { "cell_type": "code", "execution_count": null, From afd3a6124c70ec8d60863985e4c34e75186f65ec Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Thu, 29 Aug 2024 16:26:31 +0000 Subject: [PATCH 04/13] Removeing_template file --- docs/pangeo-fish_papermill.ipynb | 3731 ------------------------------ 1 file changed, 3731 deletions(-) delete mode 100644 docs/pangeo-fish_papermill.ipynb diff --git a/docs/pangeo-fish_papermill.ipynb b/docs/pangeo-fish_papermill.ipynb deleted file mode 100644 index c580e15..0000000 --- a/docs/pangeo-fish_papermill.ipynb +++ /dev/null @@ -1,3731 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "86e6f639-2455-4f5d-a557-b78b1d821ecf", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "# **Example Usage of Pangeo-Fish Software**\n", - "\n", - "\n", - "**Overview:**\n", - "This Jupyter notebook demonstrates the usage of the Pangeo-Fish software, a tool designed for analyzing biologging data in reference to Earth Observation (EO) data.\n", - "\n", - "The biologging data consist of Data Storage Tag (DST), along with release and recapture time and location of the species in question. Both biologging data and the reference EO data are accessible with https and the access methods are incorporated in this notebook. \n", - "\n", - "**Purpose:**\n", - "By executing this notebook, users will learn how to set up a workflow for utilizing the Pangeo-Fish software. The workflow consists of 9 steps which are described below:\n", - "\n", - "1. **Configure the Notebook:** Prepare the notebook environment for analysis.\n", - "2. **Compare Reference Model with DST Information:** Analyze and compare data from the reference model with information from the biologging data of the species in question. \n", - "3. **Regrid the Grid from Reference Model Grid to Healpix Grid:** Transform the grid from the reference model to the Healpix grid for further analysis.\n", - "4. **Construct Emission Matrix:** Create an emission matrix based on the transformed grid.\n", - "5. **Replace emission for flagged tags:** If the tags are flagged for warm water, then it use the detection file associated and change the flagged timestamps.\n", - "6. **Combine and Normalize Emission Matrix:** Merge the emission matrix and normalize it for further processing.\n", - "7. **Estimate Model Parameters:** Determine the parameters of the model based on the normalized emission matrix.\n", - "8. **Compute State Probabilities and Tracks:** Calculate the probability distribution of the species in question and compute the tracks.\n", - "9. **Visualization:** Visualize the results of the analysis for interpretation and insight.\n", - "\n", - "Throughout this notebook, users will gain practical experience in setting up and executing a workflow using Pangeo-Fish, enabling them to apply similar methodologies to their own biologging data analysis tasks.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "c535925e-793d-41be-a989-4fae4cdaaa67", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## 1. **Configure the Notebook:** Prepare the notebook environment for analysis.\n", - "\n", - "In this step, we sets up the notebook environment for analysis. It includes installing necessary packages, importing required libraries, setting up parameters, and configuring the cluster for distributed computing. It also retrieves the tag data needed for analysis.\n", - "\n", - " " - ] - }, - { - "cell_type": "raw", - "id": "195dbd56-b0d7-4659-849b-2e5db4591d2f", - "metadata": { - "editable": true, - "raw_mimetype": "", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "!pip install ../../git/pangeo-fish/" - ] - }, - { - "cell_type": "raw", - "id": "19981551-2f17-4ac9-872c-5631edf9c0d5", - "metadata": { - "editable": true, - "raw_mimetype": "", - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "!pip install copernicusmarine" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "682ef19d-ea85-49c9-a1ee-1f22d055b580", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Import necessary libraries and modules.\n", - "import xarray as xr\n", - "from pint_xarray import unit_registry as ureg\n", - "from pangeo_fish.io import open_tag\n", - "import intake\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "cde0c569-0b64-407f-b167-bb9fe7ee4349", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "#\n", - "# Set up execution parameters for the analysis.\n", - "#\n", - "# Note: This cell is tagged as parameters, allowing automatic updates when configuring with papermil.\n", - "\n", - "# tag_name corresponds to the name of the biologging tag name (DST identification number),\n", - "# which is also a path for storing all the information for the specific fish tagged with tag_name.\n", - "# tag_name = \"AD_A11849\"\n", - "# tag_name = \"SV_A11957\"\n", - "\n", - "\n", - "tag_list = [\n", - " \"NO_A12710\",\n", - " \"CB_A11036\",\n", - " \"LT_A11385\",\n", - " \"SQ_A10684\",\n", - " \"AD_A11177\",\n", - " \"PB_A12063\",\n", - " \"NO_A12742\",\n", - " \"DK_A10642\",\n", - " \"CB_A11071\",\n", - "]\n", - "tag_name = tag_list[8]\n", - "tag_name = \"DK_A10531\"\n", - "\n", - "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", - "\n", - "# tag_root specifies the root URL for tag data used for this computation.\n", - "tag_root = f\"{cloud_root}/cleaned\"\n", - "\n", - "# catalog_url specifies the URL for the catalog for reference data used.\n", - "catalog_url = \"s3://gfts-ifremer/copernicus_catalogs/master.yml\"\n", - "\n", - "# scratch_root specifies the root directory for storing output files.\n", - "scratch_root = f\"{cloud_root}/tracks\"\n", - "\n", - "\n", - "# storage_options specifies options for the filesystem storing output files.\n", - "storage_options = {\n", - " \"anon\": False,\n", - " # 'profile' : \"gfts\",\n", - " \"client_kwargs\": {\n", - " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", - " \"region_name\": \"gra\",\n", - " },\n", - "}\n", - "\n", - "# if you are using local file system, activate following two lines\n", - "folder_name = \"../toto\"\n", - "storage_options = None\n", - "scratch_root = f\"/home/jovyan/notebooks/papermill/{folder_name}\"\n", - "\n", - "# Default chunk value for time dimension. This values depends on the configuration of your dask cluster.\n", - "chunk_time = 24\n", - "\n", - "#\n", - "# Parameters for step 2. **Compare Reference Model with DST Information:**\n", - "#\n", - "# bbox, bounding box, defines the latitude and longitude range for the analysis area.\n", - "bbox = {\"latitude\": [40, 56], \"longitude\": [-13, 5]}\n", - "\n", - "# relative_depth_threshold defines the acceptable fish depth relative to the maximum tag depth.\n", - "# It determines whether the fish can be considered to be in a certain location based on depth.\n", - "relative_depth_threshold = 0.8\n", - "\n", - "#\n", - "# Parameters for step 3. **Regrid the Grid from Reference Model Grid to Healpix Grid:**\n", - "#\n", - "# Distance filepath is the path to the coastal distance file.\n", - "distance_filepath = \"s3://gfts-ifremer/tags/distance2coast.zarr\"\n", - "\n", - "# distance_scale_factor scales the squared distance in the exponential decay function.\n", - "distance_scale_factor = 0.01\n", - "\n", - "# nside defines the resolution of the healpix grid used for regridding.\n", - "nside = 4096 # *2\n", - "\n", - "# rot defines the rotation angles for the healpix grid.\n", - "rot = {\"lat\": 0, \"lon\": 30}\n", - "\n", - "# min_vertices sets the minimum number of vertices for a valid transcription for regridding.\n", - "min_vertices = 1\n", - "\n", - "#\n", - "# Parameters for step 4. **Construct Emission Matrix:**\n", - "#\n", - "# differences_std sets the standard deviation for scipy.stats.norm.pdf.\n", - "# It expresses the estimated certainty of the field of difference.\n", - "differences_std = 0.75\n", - "\n", - "# recapture_std sets the covariance for recapture event.\n", - "# It shows the certainty of the final recapture area if it is known.\n", - "recapture_std = 1e-2\n", - "\n", - "# earth_radius defines the radius of the Earth used for distance calculations.\n", - "earth_radius = ureg.Quantity(6371, \"km\")\n", - "\n", - "# maximum_speed sets the maximum allowable speed for the tagged fish.\n", - "maximum_speed = ureg.Quantity(20, \"km / day\")\n", - "\n", - "# adjustment_factor adjusts parameters for a more fuzzy search.\n", - "# It will factor the allowed maximum displacement of the fish.\n", - "adjustment_factor = 5\n", - "\n", - "# truncate sets the truncating factor for computed maximum allowed sigma for convolution process.\n", - "truncate = 4\n", - "\n", - "#\n", - "# Parameters for step 5. **Compute Additional Emission Probability Matrix:**\n", - "#\n", - "\n", - "\n", - "# buffer_size sets the size of the powerplant warm plume.\n", - "buffer_size = ureg.Quantity(1000, \"m\")\n", - "# powerplant_flag is a boolean that states if the fish has swam in warm plume\n", - "\n", - "\n", - "#\n", - "# Parameters for step 7. **Estimate Model Parameters:**\n", - "#\n", - "# tolerance sets the tolerance level for optimised parameter serarch computation.\n", - "tolerance = 1e-3\n", - "\n", - "#\n", - "# Parameters for step 8. **Compute State Probabilities and Tracks:**\n", - "#\n", - "# track_modes defines the modes for track calculation.\n", - "track_modes = [\"mean\", \"mode\"]\n", - "\n", - "# additional_track_quantities sets quantities to compute for tracks using moving pandas.\n", - "additional_track_quantities = [\"speed\", \"distance\"]\n", - "\n", - "\n", - "#\n", - "# Parameters for step 9. **Visualization:**\n", - "#\n", - "# time_step defines for each time_step value we visualize state and emission matrix.\n", - "time_step = 3\n", - "\n", - "\n", - "# Define target root directories for storing analysis results.\n", - "target_root = f\"{scratch_root}/{tag_name}\"\n", - "\n", - "# Defines default chunk size for optimisation.\n", - "default_chunk = {\"time\": chunk_time, \"lat\": -1, \"lon\": -1}\n", - "default_chunk_xy = {\"time\": chunk_time, \"x\": -1, \"y\": -1}" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "653051cb-1868-43a4-a8b9-7d985ca95dcb", - "metadata": {}, - "outputs": [], - "source": [ - "# Define target root directories for storing analysis results.\n", - "target_root = f\"{scratch_root}/{tag_name}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "b855b1c2-2e68-4d58-bab7-29f581afe32d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/home/jovyan/notebooks/papermill/../toto/DK_A10531'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "target_root" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "45612cff-e622-4a3b-9879-2fac50c8cfe5", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'s3://gfts-ifremer/tags/bargip/cleaned'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tag_root" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "1e5ef786-4d46-435b-8b80-215a017d89eb", - "metadata": {}, - "outputs": [], - "source": [ - "warm_plume = pd.read_csv(\n", - " \"s3://gfts-ifremer/tags/bargip/bar_flag_warm_plume.txt\", sep=\"\\t\"\n", - ")\n", - "warm_list = list(warm_plume[warm_plume[\"warm_plume\"] == True][\"tag_name\"])\n", - "cond = tag_name in warm_list\n", - "\n", - "if cond:\n", - " powerplant_flag = True\n", - "else:\n", - " powerplant_flag = False" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "37fd021e-71d0-4837-9033-cc417943514d", - "metadata": {}, - "outputs": [], - "source": [ - "if powerplant_flag is True:\n", - " detection_file = f\"{tag_root}/{tag_name}/detection.csv\"\n", - " powerplant_file = f\"{cloud_root}/nuclear_plant_loc.csv\"" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "1268b5c0-b1e8-4d12-b6c9-b3b7aa54f99b", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
\n", - "

Client

\n", - "

Client-b8edf70c-661c-11ef-81cb-9aeb6565527d

\n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "
Connection method: Cluster objectCluster type: distributed.LocalCluster
\n", - " Dashboard: http://127.0.0.1:8787/status\n", - "
\n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "

Cluster Info

\n", - "
\n", - "
\n", - "
\n", - "
\n", - "

LocalCluster

\n", - "

12d4ffc1

\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - " \n", - "
\n", - " Dashboard: http://127.0.0.1:8787/status\n", - " \n", - " Workers: 3\n", - "
\n", - " Total threads: 6\n", - " \n", - " Total memory: 24.00 GiB\n", - "
Status: runningUsing processes: True
\n", - "\n", - "
\n", - " \n", - "

Scheduler Info

\n", - "
\n", - "\n", - "
\n", - "
\n", - "
\n", - "
\n", - "

Scheduler

\n", - "

Scheduler-56c91c47-eaf3-4862-a916-47d10875bbc1

\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " Comm: tcp://127.0.0.1:37569\n", - " \n", - " Workers: 3\n", - "
\n", - " Dashboard: http://127.0.0.1:8787/status\n", - " \n", - " Total threads: 6\n", - "
\n", - " Started: Just now\n", - " \n", - " Total memory: 24.00 GiB\n", - "
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "

Workers

\n", - "
\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "

Worker: 0

\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - "
\n", - " Comm: tcp://127.0.0.1:36385\n", - " \n", - " Total threads: 2\n", - "
\n", - " Dashboard: http://127.0.0.1:37373/status\n", - " \n", - " Memory: 8.00 GiB\n", - "
\n", - " Nanny: tcp://127.0.0.1:39201\n", - "
\n", - " Local directory: /tmp/dask-scratch-space/worker-zfryeqlj\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "

Worker: 1

\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - "
\n", - " Comm: tcp://127.0.0.1:33789\n", - " \n", - " Total threads: 2\n", - "
\n", - " Dashboard: http://127.0.0.1:43807/status\n", - " \n", - " Memory: 8.00 GiB\n", - "
\n", - " Nanny: tcp://127.0.0.1:36055\n", - "
\n", - " Local directory: /tmp/dask-scratch-space/worker-_ig3y5un\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "

Worker: 2

\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - "\n", - " \n", - "\n", - "
\n", - " Comm: tcp://127.0.0.1:34455\n", - " \n", - " Total threads: 2\n", - "
\n", - " Dashboard: http://127.0.0.1:46481/status\n", - " \n", - " Memory: 8.00 GiB\n", - "
\n", - " Nanny: tcp://127.0.0.1:39461\n", - "
\n", - " Local directory: /tmp/dask-scratch-space/worker-j2jcan7e\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "\n", - "
\n", - "
\n", - "\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "\n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Set up a local cluster for distributed computing.\n", - "from distributed import LocalCluster\n", - "\n", - "cluster = LocalCluster()\n", - "client = cluster.get_client()\n", - "client" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "206aeb3c-9684-4eac-80e8-e94939529747", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DatasetView> Size: 0B\n",
-       "Dimensions:  ()\n",
-       "Data variables:\n",
-       "    *empty*\n",
-       "Attributes:\n",
-       "    pit_tag_id:       DK_A10531\n",
-       "    scientific_name:  Dicentrarchus labrax\n",
-       "    common_name:      European seabass\n",
-       "    project:          BARGIP
" - ], - "text/plain": [ - "DataTree('None', parent=None)\n", - "│ Dimensions: ()\n", - "│ Data variables:\n", - "│ *empty*\n", - "│ Attributes:\n", - "│ pit_tag_id: DK_A10531\n", - "│ scientific_name: Dicentrarchus labrax\n", - "│ common_name: European seabass\n", - "│ project: BARGIP\n", - "├── DataTree('dst')\n", - "│ Dimensions: (time: 112760)\n", - "│ Coordinates:\n", - "│ * time (time) datetime64[ns] 902kB 2014-06-05T22:00:00 ... 2014-10-...\n", - "│ Data variables:\n", - "│ temperature (time) float64 902kB 20.56 20.56 20.56 ... 23.59 23.62 23.66\n", - "│ pressure (time) float64 902kB -0.38 -0.38 -0.38 ... -1.01 -1.01 -1.01\n", - "└── DataTree('tagging_events')\n", - " Dimensions: (event_name: 2)\n", - " Coordinates:\n", - " * event_name (event_name) object 16B 'release' 'fish_death'\n", - " Data variables:\n", - " time (event_name) datetime64[ns] 16B 2014-06-06T17:35:00 2014-08-1...\n", - " longitude (event_name) float64 16B 2.172 2.292\n", - " latitude (event_name) float64 16B 51.06 51.05" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Open and retrieve the tag data required for the analysis\n", - "tag = open_tag(tag_root, tag_name)\n", - "tag" - ] - }, - { - "cell_type": "markdown", - "id": "524fe17c-43b2-498b-a06b-91ddfba27b81", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## 2. **Compare Reference Model with DST Tag Information:** Analyze and compare data from the reference model with information from the biologging data of the species in question. \n", - "\n", - "In this step, we compare the reference model data with Data Storage Tag information.\n", - "The process involves reading and cleaning the reference model, aligning time, converting depth units, subtracting tag data from the model, and saving the results." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "4afd36b3-2121-45ec-9ffc-d03b6bda9d24", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "application/javascript": [ - "(function(root) {\n", - " function now() {\n", - " return new Date();\n", - " }\n", - "\n", - " var force = true;\n", - " var py_version = '3.4.0'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", - " var reloading = false;\n", - " var Bokeh = root.Bokeh;\n", - "\n", - " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", - " root._bokeh_timeout = Date.now() + 5000;\n", - " root._bokeh_failed_load = false;\n", - " }\n", - "\n", - " function run_callbacks() {\n", - " try {\n", - " root._bokeh_onload_callbacks.forEach(function(callback) {\n", - " if (callback != null)\n", - " callback();\n", - " });\n", - " } finally {\n", - " delete root._bokeh_onload_callbacks;\n", - " }\n", - " console.debug(\"Bokeh: all callbacks have finished\");\n", - " }\n", - "\n", - " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", - " if (css_urls == null) css_urls = [];\n", - " if (js_urls == null) js_urls = [];\n", - " if (js_modules == null) js_modules = [];\n", - " if (js_exports == null) js_exports = {};\n", - "\n", - " root._bokeh_onload_callbacks.push(callback);\n", - "\n", - " if (root._bokeh_is_loading > 0) {\n", - " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", - " return null;\n", - " }\n", - " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", - " run_callbacks();\n", - " return null;\n", - " }\n", - " if (!reloading) {\n", - " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", - " }\n", - "\n", - " function on_load() {\n", - " root._bokeh_is_loading--;\n", - " if (root._bokeh_is_loading === 0) {\n", - " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", - " run_callbacks()\n", - " }\n", - " }\n", - " window._bokeh_on_load = on_load\n", - "\n", - " function on_error() {\n", - " console.error(\"failed to load \" + url);\n", - " }\n", - "\n", - " var skip = [];\n", - " if (window.requirejs) {\n", - " window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n", - " root._bokeh_is_loading = css_urls.length + 0;\n", - " } else {\n", - " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", - " }\n", - "\n", - " var existing_stylesheets = []\n", - " var links = document.getElementsByTagName('link')\n", - " for (var i = 0; i < links.length; i++) {\n", - " var link = links[i]\n", - " if (link.href != null) {\n", - "\texisting_stylesheets.push(link.href)\n", - " }\n", - " }\n", - " for (var i = 0; i < css_urls.length; i++) {\n", - " var url = css_urls[i];\n", - " if (existing_stylesheets.indexOf(url) !== -1) {\n", - "\ton_load()\n", - "\tcontinue;\n", - " }\n", - " const element = document.createElement(\"link\");\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.rel = \"stylesheet\";\n", - " element.type = \"text/css\";\n", - " element.href = url;\n", - " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", - " document.body.appendChild(element);\n", - " } var existing_scripts = []\n", - " var scripts = document.getElementsByTagName('script')\n", - " for (var i = 0; i < scripts.length; i++) {\n", - " var script = scripts[i]\n", - " if (script.src != null) {\n", - "\texisting_scripts.push(script.src)\n", - " }\n", - " }\n", - " for (var i = 0; i < js_urls.length; i++) {\n", - " var url = js_urls[i];\n", - " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.src = url;\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.head.appendChild(element);\n", - " }\n", - " for (var i = 0; i < js_modules.length; i++) {\n", - " var url = js_modules[i];\n", - " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onload = on_load;\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.src = url;\n", - " element.type = \"module\";\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.head.appendChild(element);\n", - " }\n", - " for (const name in js_exports) {\n", - " var url = js_exports[name];\n", - " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", - "\tif (!window.requirejs) {\n", - "\t on_load();\n", - "\t}\n", - "\tcontinue;\n", - " }\n", - " var element = document.createElement('script');\n", - " element.onerror = on_error;\n", - " element.async = false;\n", - " element.type = \"module\";\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " element.textContent = `\n", - " import ${name} from \"${url}\"\n", - " window.${name} = ${name}\n", - " window._bokeh_on_load()\n", - " `\n", - " document.head.appendChild(element);\n", - " }\n", - " if (!js_urls.length && !js_modules.length) {\n", - " on_load()\n", - " }\n", - " };\n", - "\n", - " function inject_raw_css(css) {\n", - " const element = document.createElement(\"style\");\n", - " element.appendChild(document.createTextNode(css));\n", - " document.body.appendChild(element);\n", - " }\n", - "\n", - " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.holoviz.org/panel/1.4.0/dist/panel.min.js\"];\n", - " var js_modules = [];\n", - " var js_exports = {};\n", - " var css_urls = [];\n", - " var inline_js = [ function(Bokeh) {\n", - " Bokeh.set_log_level(\"info\");\n", - " },\n", - "function(Bokeh) {} // ensure no trailing comma for IE\n", - " ];\n", - "\n", - " function run_inline_js() {\n", - " if ((root.Bokeh !== undefined) || (force === true)) {\n", - " for (var i = 0; i < inline_js.length; i++) {\n", - "\ttry {\n", - " inline_js[i].call(root, root.Bokeh);\n", - "\t} catch(e) {\n", - "\t if (!reloading) {\n", - "\t throw e;\n", - "\t }\n", - "\t}\n", - " }\n", - " // Cache old bokeh versions\n", - " if (Bokeh != undefined && !reloading) {\n", - "\tvar NewBokeh = root.Bokeh;\n", - "\tif (Bokeh.versions === undefined) {\n", - "\t Bokeh.versions = new Map();\n", - "\t}\n", - "\tif (NewBokeh.version !== Bokeh.version) {\n", - "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", - "\t}\n", - "\troot.Bokeh = Bokeh;\n", - " }} else if (Date.now() < root._bokeh_timeout) {\n", - " setTimeout(run_inline_js, 100);\n", - " } else if (!root._bokeh_failed_load) {\n", - " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", - " root._bokeh_failed_load = true;\n", - " }\n", - " root._bokeh_is_initializing = false\n", - " }\n", - "\n", - " function load_or_wait() {\n", - " // Implement a backoff loop that tries to ensure we do not load multiple\n", - " // versions of Bokeh and its dependencies at the same time.\n", - " // In recent versions we use the root._bokeh_is_initializing flag\n", - " // to determine whether there is an ongoing attempt to initialize\n", - " // bokeh, however for backward compatibility we also try to ensure\n", - " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", - " // before older versions are fully initialized.\n", - " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", - " root._bokeh_is_initializing = false;\n", - " root._bokeh_onload_callbacks = undefined;\n", - " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", - " load_or_wait();\n", - " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", - " setTimeout(load_or_wait, 100);\n", - " } else {\n", - " root._bokeh_is_initializing = true\n", - " root._bokeh_onload_callbacks = []\n", - " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", - " if (!reloading && !bokeh_loaded) {\n", - "\troot.Bokeh = undefined;\n", - " }\n", - " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", - "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", - "\trun_inline_js();\n", - " });\n", - " }\n", - " }\n", - " // Give older versions of the autoload script a head-start to ensure\n", - " // they initialize before we start loading newer version.\n", - " setTimeout(load_or_wait, 100)\n", - "}(window));" - ], - "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.4.0'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.0.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.0.min.js\", \"https://cdn.holoviz.org/panel/1.4.0/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/javascript": [ - "\n", - "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", - " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", - "}\n", - "\n", - "\n", - " function JupyterCommManager() {\n", - " }\n", - "\n", - " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", - " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", - " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", - " comm_manager.register_target(comm_id, function(comm) {\n", - " comm.on_msg(msg_handler);\n", - " });\n", - " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", - " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", - " comm.onMsg = msg_handler;\n", - " });\n", - " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", - " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", - " var messages = comm.messages[Symbol.asyncIterator]();\n", - " function processIteratorResult(result) {\n", - " var message = result.value;\n", - " console.log(message)\n", - " var content = {data: message.data, comm_id};\n", - " var buffers = []\n", - " for (var buffer of message.buffers || []) {\n", - " buffers.push(new DataView(buffer))\n", - " }\n", - " var metadata = message.metadata || {};\n", - " var msg = {content, buffers, metadata}\n", - " msg_handler(msg);\n", - " return messages.next().then(processIteratorResult);\n", - " }\n", - " return messages.next().then(processIteratorResult);\n", - " })\n", - " }\n", - " }\n", - "\n", - " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", - " if (comm_id in window.PyViz.comms) {\n", - " return window.PyViz.comms[comm_id];\n", - " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", - " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", - " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", - " if (msg_handler) {\n", - " comm.on_msg(msg_handler);\n", - " }\n", - " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", - " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", - " comm.open();\n", - " if (msg_handler) {\n", - " comm.onMsg = msg_handler;\n", - " }\n", - " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", - " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", - " comm_promise.then((comm) => {\n", - " window.PyViz.comms[comm_id] = comm;\n", - " if (msg_handler) {\n", - " var messages = comm.messages[Symbol.asyncIterator]();\n", - " function processIteratorResult(result) {\n", - " var message = result.value;\n", - " var content = {data: message.data};\n", - " var metadata = message.metadata || {comm_id};\n", - " var msg = {content, metadata}\n", - " msg_handler(msg);\n", - " return messages.next().then(processIteratorResult);\n", - " }\n", - " return messages.next().then(processIteratorResult);\n", - " }\n", - " }) \n", - " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", - " return comm_promise.then((comm) => {\n", - " comm.send(data, metadata, buffers, disposeOnDone);\n", - " });\n", - " };\n", - " var comm = {\n", - " send: sendClosure\n", - " };\n", - " }\n", - " window.PyViz.comms[comm_id] = comm;\n", - " return comm;\n", - " }\n", - " window.PyViz.comm_manager = new JupyterCommManager();\n", - " \n", - "\n", - "\n", - "var JS_MIME_TYPE = 'application/javascript';\n", - "var HTML_MIME_TYPE = 'text/html';\n", - "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", - "var CLASS_NAME = 'output';\n", - "\n", - "/**\n", - " * Render data to the DOM node\n", - " */\n", - "function render(props, node) {\n", - " var div = document.createElement(\"div\");\n", - " var script = document.createElement(\"script\");\n", - " node.appendChild(div);\n", - " node.appendChild(script);\n", - "}\n", - "\n", - "/**\n", - " * Handle when a new output is added\n", - " */\n", - "function handle_add_output(event, handle) {\n", - " var output_area = handle.output_area;\n", - " var output = handle.output;\n", - " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", - " return\n", - " }\n", - " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", - " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", - " if (id !== undefined) {\n", - " var nchildren = toinsert.length;\n", - " var html_node = toinsert[nchildren-1].children[0];\n", - " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", - " var scripts = [];\n", - " var nodelist = html_node.querySelectorAll(\"script\");\n", - " for (var i in nodelist) {\n", - " if (nodelist.hasOwnProperty(i)) {\n", - " scripts.push(nodelist[i])\n", - " }\n", - " }\n", - "\n", - " scripts.forEach( function (oldScript) {\n", - " var newScript = document.createElement(\"script\");\n", - " var attrs = [];\n", - " var nodemap = oldScript.attributes;\n", - " for (var j in nodemap) {\n", - " if (nodemap.hasOwnProperty(j)) {\n", - " attrs.push(nodemap[j])\n", - " }\n", - " }\n", - " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", - " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", - " oldScript.parentNode.replaceChild(newScript, oldScript);\n", - " });\n", - " if (JS_MIME_TYPE in output.data) {\n", - " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", - " }\n", - " output_area._hv_plot_id = id;\n", - " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", - " window.PyViz.plot_index[id] = Bokeh.index[id];\n", - " } else {\n", - " window.PyViz.plot_index[id] = null;\n", - " }\n", - " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", - " var bk_div = document.createElement(\"div\");\n", - " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", - " var script_attrs = bk_div.children[0].attributes;\n", - " for (var i = 0; i < script_attrs.length; i++) {\n", - " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", - " }\n", - " // store reference to server id on output_area\n", - " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", - " }\n", - "}\n", - "\n", - "/**\n", - " * Handle when an output is cleared or removed\n", - " */\n", - "function handle_clear_output(event, handle) {\n", - " var id = handle.cell.output_area._hv_plot_id;\n", - " var server_id = handle.cell.output_area._bokeh_server_id;\n", - " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", - " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", - " if (server_id !== null) {\n", - " comm.send({event_type: 'server_delete', 'id': server_id});\n", - " return;\n", - " } else if (comm !== null) {\n", - " comm.send({event_type: 'delete', 'id': id});\n", - " }\n", - " delete PyViz.plot_index[id];\n", - " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", - " var doc = window.Bokeh.index[id].model.document\n", - " doc.clear();\n", - " const i = window.Bokeh.documents.indexOf(doc);\n", - " if (i > -1) {\n", - " window.Bokeh.documents.splice(i, 1);\n", - " }\n", - " }\n", - "}\n", - "\n", - "/**\n", - " * Handle kernel restart event\n", - " */\n", - "function handle_kernel_cleanup(event, handle) {\n", - " delete PyViz.comms[\"hv-extension-comm\"];\n", - " window.PyViz.plot_index = {}\n", - "}\n", - "\n", - "/**\n", - " * Handle update_display_data messages\n", - " */\n", - "function handle_update_output(event, handle) {\n", - " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", - " handle_add_output(event, handle)\n", - "}\n", - "\n", - "function register_renderer(events, OutputArea) {\n", - " function append_mime(data, metadata, element) {\n", - " // create a DOM node to render to\n", - " var toinsert = this.create_output_subarea(\n", - " metadata,\n", - " CLASS_NAME,\n", - " EXEC_MIME_TYPE\n", - " );\n", - " this.keyboard_manager.register_events(toinsert);\n", - " // Render to node\n", - " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", - " render(props, toinsert[0]);\n", - " element.append(toinsert);\n", - " return toinsert\n", - " }\n", - "\n", - " events.on('output_added.OutputArea', handle_add_output);\n", - " events.on('output_updated.OutputArea', handle_update_output);\n", - " events.on('clear_output.CodeCell', handle_clear_output);\n", - " events.on('delete.Cell', handle_clear_output);\n", - " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", - "\n", - " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", - " safe: true,\n", - " index: 0\n", - " });\n", - "}\n", - "\n", - "if (window.Jupyter !== undefined) {\n", - " try {\n", - " var events = require('base/js/events');\n", - " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", - " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", - " register_renderer(events, OutputArea);\n", - " }\n", - " } catch(err) {\n", - " }\n", - "}\n" - ], - "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.holoviews_exec.v0+json": "", - "text/html": [ - "
\n", - "
\n", - "
\n", - "" - ] - }, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "p1002" - } - }, - "output_type": "display_data" - } - ], - "source": [ - "# Import necessary libraries\n", - "from pangeo_fish.cf import bounds_to_bins\n", - "from pangeo_fish.diff import diff_z\n", - "from pangeo_fish.tags import adapt_model_time, reshape_by_bins, to_time_slice\n", - "\n", - "# Drop data outside the reference interval\n", - "time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", - "time = tag[\"dst\"].ds.time\n", - "cond = (time <= time_slice.stop) & (time >= time_slice.start)\n", - "\n", - "tag_log = tag[\"dst\"].ds.where(cond, drop=True)\n", - "\n", - "min_ = tag_log.time[0]\n", - "max_ = tag_log.time[-1]\n", - "\n", - "time_slice = slice(min_.data, max_.data)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "da5b25fe-6028-4daf-97b8-7f0e00a1c581", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "def get_copernicus_zarr(product_id=\"IBI_MULTIYEAR_PHY_005_002\"):\n", - " master_cat = intake.open_catalog(catalog_url)\n", - " if product_id == \"IBI_MULTIYEAR_PHY_005_002\":\n", - " # Open necessary datasets\n", - " sub_cat = master_cat[product_id]\n", - " thetao = sub_cat[\"cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m\"](\n", - " chunk=\"time\"\n", - " ).to_dask()[[\"thetao\"]]\n", - " zos = (\n", - " sub_cat[\"cmems_mod_ibi_phy_my_0.083deg-3D_P1D-m\"](chunk=\"time\")\n", - " .to_dask()\n", - " .zos\n", - " )\n", - " deptho = sub_cat[\"cmems_mod_ibi_phy_my_0.083deg-3D_static\"].to_dask().deptho\n", - "\n", - " # Assign latitude and longitude from thetao to deptho to shift in positions\n", - " deptho[\"latitude\"] = thetao[\"latitude\"]\n", - " deptho[\"longitude\"] = thetao[\"longitude\"]\n", - "\n", - " # Create mask for deptho\n", - " mask = deptho.isnull()\n", - "\n", - " # Merge datasets and assign relevant variables\n", - " ds = (\n", - " thetao.rename({\"thetao\": \"TEMP\"}).assign(\n", - " {\n", - " \"XE\": zos,\n", - " \"H0\": deptho,\n", - " \"mask\": mask,\n", - " }\n", - " )\n", - " ).rename({\"latitude\": \"lat\", \"longitude\": \"lon\", \"elevation\": \"depth\"})\n", - "\n", - " # Ensure depth is positive\n", - " ds[\"depth\"] = abs(ds[\"depth\"])\n", - "\n", - " # Rearrange depth coordinates and assign dynamic depth and bathymetry\n", - " ds = (\n", - " ds.isel(depth=slice(None, None, -1))\n", - " .assign(\n", - " {\n", - " \"dynamic_depth\": lambda ds: (ds[\"depth\"] + ds[\"XE\"]).assign_attrs(\n", - " {\"units\": \"m\", \"positive\": \"down\"}\n", - " ),\n", - " \"dynamic_bathymetry\": lambda ds: (ds[\"H0\"] + ds[\"XE\"]).assign_attrs(\n", - " {\"units\": \"m\", \"positive\": \"down\"}\n", - " ),\n", - " }\n", - " )\n", - " .pipe(broadcast_variables, {\"lat\": \"latitude\", \"lon\": \"longitude\"})\n", - " )\n", - " # print(uris_by_key)\n", - " return ds" - ] - }, - { - "cell_type": "raw", - "id": "b6763746-0735-483c-aa07-5d43557351d5", - "metadata": {}, - "source": [ - "# Verify the data\n", - "from pangeo_fish.io import save_html_hvplot\n", - "\n", - "plot = (\n", - " (-tag[\"dst\"].pressure).hvplot(width=1000, height=500, color=\"blue\")\n", - " * (-tag_log).hvplot.scatter(\n", - " x=\"time\", y=\"pressure\", color=\"red\", size=5, width=1000, height=500\n", - " )\n", - " * (\n", - " (tag[\"dst\"].temperature).hvplot(width=1000, height=500, color=\"blue\")\n", - " * (tag_log).hvplot.scatter(\n", - " x=\"time\", y=\"temperature\", color=\"red\", size=5, width=1000, height=500\n", - " )\n", - " )\n", - ")\n", - "filepath = f\"{target_root}/tags.html\"\n", - "\n", - "save_html_hvplot(plot, filepath, storage_options)\n", - "\n", - "# plot" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "e1f02606-3999-445e-84c0-0c287502c7e9", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from pangeo_fish.io import broadcast_variables" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "427b5862-9e01-4dd6-bd61-0a9563213dce", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/srv/conda/envs/notebook/lib/python3.11/site-packages/intake_xarray/base.py:21: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.\n", - " 'dims': dict(self._ds.dims),\n", - "/srv/conda/envs/notebook/lib/python3.11/site-packages/intake_xarray/base.py:21: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.\n", - " 'dims': dict(self._ds.dims),\n" - ] - } - ], - "source": [ - "model = get_copernicus_zarr()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "bab1c2d9-d4cb-4392-a07c-9442b8f05f46", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Subset the reference_model by\n", - "# - align model time with the time of tag_log, also\n", - "# - drop data for depth later that are unlikely due to the observed pressure from tag_log\n", - "# - defined latitude and longitude of bbox.\n", - "#\n", - "reference_model = (\n", - " model.sel(time=adapt_model_time(time_slice))\n", - " .sel(lat=slice(*bbox[\"latitude\"]), lon=slice(*bbox[\"longitude\"]))\n", - " .pipe(\n", - " lambda ds: ds.sel(\n", - " depth=slice(None, (tag_log[\"pressure\"].max() - ds[\"XE\"].min()).compute())\n", - " )\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "3ca01800-cc83-471f-9e8e-c0851f006f1f", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 276 ms, sys: 36.5 ms, total: 312 ms\n", - "Wall time: 295 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "# Reshape the tag log, so that it bins to the time step of reference_model\n", - "reshaped_tag = reshape_by_bins(\n", - " tag_log,\n", - " dim=\"time\",\n", - " bins=(\n", - " reference_model.cf.add_bounds([\"time\"], output_dim=\"bounds\")\n", - " .pipe(bounds_to_bins, bounds_dim=\"bounds\")\n", - " .get(\"time_bins\")\n", - " ),\n", - " bin_dim=\"bincount\",\n", - " other_dim=\"obs\",\n", - ").chunk({\"time\": chunk_time})" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "46574d23-54f8-45e1-8619-06e05292f1c2", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Subtract the time_bined tag_log from the reference_model.\n", - "# Here, for each time_bin, each observed value are compared with the correspoindng depth of reference_model using diff_z function.\n", - "#\n", - "\n", - "diff = (\n", - " diff_z(\n", - " reference_model.chunk(dict(depth=-1)),\n", - " reshaped_tag,\n", - " depth_threshold=relative_depth_threshold,\n", - " )\n", - " .assign_attrs({\"tag_id\": tag_name})\n", - " .assign(\n", - " {\n", - " \"H0\": reference_model[\"H0\"],\n", - " \"ocean_mask\": reference_model[\"H0\"].notnull(),\n", - " }\n", - " )\n", - ")\n", - "\n", - "# Persist the diff data\n", - "diff = diff.chunk(default_chunk).persist()\n", - "# diff" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "74efae4e-53d2-4852-9b20-c5e3028e0c63", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 2 µs, sys: 0 ns, total: 2 µs\n", - "Wall time: 4.53 µs\n" - ] - } - ], - "source": [ - "%%time\n", - "# Verify the data\n", - "# diff[\"diff\"].count([\"lat\",\"lon\"]).plot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2b65b45d-7d94-4761-8e9f-d958eabfedb8", - "metadata": {}, - "outputs": [], - "source": [ - "diff" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41e73dee-69bb-4827-af07-ec84e3a5120d", - "metadata": {}, - "outputs": [], - "source": [ - "target_lat = diff[\"lat\"]\n", - "target_lon = diff[\"lon\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4c0325b-523c-4d58-8319-151183bb1376", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# Save snapshot to disk\n", - "diff.to_zarr(f\"{target_root}/diff.zarr\", mode=\"w\", storage_options=storage_options)\n", - "\n", - "# Cleanup\n", - "del tag_log, model, reference_model, reshaped_tag, diff" - ] - }, - { - "cell_type": "markdown", - "id": "788d3e03-0e98-4355-a725-a2cce85115cf", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## 3. **Regrid the Grid from Reference Model Grid to Healpix Grid:** Transform the grid from the reference model to the Healpix grid for further analysis.\n", - "\n", - "In this step, we regrid the data from the reference model grid to a Healpix grid. This process involves defining the Healpix grid, creating the target grid, computing interpolation weights, performing the regridding, and saving the regridded data.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5d7dcccb-dbf3-435c-9094-4bd73497ef22", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Import necessary libraries\n", - "import s3fs\n", - "import numpy as np\n", - "from xarray_healpy import HealpyGridInfo, HealpyRegridder\n", - "from pangeo_fish.grid import center_longitude" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f08ff2ef-fc82-449d-90b7-c3bae7dc5d1b", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "\n", - "# Open the diff data and performs cleaning operations to prepare it for regridding.\n", - "\n", - "ds = (\n", - " xr.open_dataset(\n", - " f\"{target_root}/diff.zarr\",\n", - " engine=\"zarr\",\n", - " chunks={},\n", - " storage_options=storage_options,\n", - " )\n", - " .pipe(lambda ds: ds.merge(ds[[\"latitude\", \"longitude\"]].compute()))\n", - " .swap_dims({\"lat\": \"yi\", \"lon\": \"xi\"})\n", - ")\n", - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "96096bcb-a9c5-45e3-8bfa-8f1dea01e324", - "metadata": {}, - "outputs": [], - "source": [ - "s3 = s3fs.S3FileSystem(\n", - " anon=False,\n", - " client_kwargs={\n", - " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", - " },\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd419691-b48d-41e7-b5f5-bcd4bf2ce9ad", - "metadata": {}, - "outputs": [], - "source": [ - "coastal_distance = xr.open_zarr(distance_filepath).sel(\n", - " lat=slice(56, 40), lon=slice(-13, 5)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d5d18c5f-e323-45be-9984-0509f34a8e4e", - "metadata": {}, - "outputs": [], - "source": [ - "coastal_distance = coastal_distance.sortby(\"lat\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "37b25b39-3253-4d0d-8c5b-8399ce412b44", - "metadata": {}, - "outputs": [], - "source": [ - "coastal_distance = coastal_distance.interp(\n", - " lat=target_lat, lon=target_lon, method=\"linear\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "902c96ba-04db-4197-8b25-825dc3f81ce0", - "metadata": {}, - "outputs": [], - "source": [ - "coastal_distance[\"dist\"] = 1 + np.exp(\n", - " -(coastal_distance.dist * coastal_distance.dist) * distance_scale_factor\n", - ")" - ] - }, - { - "cell_type": "raw", - "id": "15935fe4-9c00-41b7-b44c-7bd3fd81ae2d", - "metadata": {}, - "source": [ - "(coastal_distance).dist.hvplot(geo=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fe4feb54-0d9d-424f-9215-7194d5dd1d23", - "metadata": {}, - "outputs": [], - "source": [ - "coastal_distance = coastal_distance.swap_dims({\"lat\": \"yi\", \"lon\": \"xi\"}).drop_vars(\n", - " [\"lat\", \"lon\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6778d294-bfae-4cdc-844c-724400ffe7b1", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# Define the target Healpix grid information\n", - "grid = HealpyGridInfo(level=int(np.log2(nside)), rot=rot)\n", - "target_grid = grid.target_grid(ds).pipe(center_longitude, 0)\n", - "target_grid" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7078ae4b-fd5d-4021-9aad-096d4a392199", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# Compute the interpolation weights for regridding the diff data\n", - "regridder = HealpyRegridder(\n", - " ds[[\"longitude\", \"latitude\", \"ocean_mask\"]],\n", - " target_grid,\n", - " method=\"bilinear\",\n", - " interpolation_kwargs={\"mask\": \"ocean_mask\", \"min_vertices\": min_vertices},\n", - ")\n", - "regridder" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bc8c737-7aa4-4177-8aa4-929e3e917de9", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# Perform the regridding operation using the computed interpolation weights.\n", - "regridded = regridder.regrid_ds(ds)\n", - "regridded" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6857fe5f-2e64-4b42-81df-c8724376301e", - "metadata": {}, - "outputs": [], - "source": [ - "regridded_coastal = regridder.regrid_ds(coastal_distance)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd14c62b-7a41-4dc9-a5b8-3d19742147c8", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# Reshape the regridded data to 2D\n", - "reshaped = grid.to_2d(regridded).pipe(center_longitude, 0)\n", - "reshaped = reshaped.persist()\n", - "reshaped" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bc4ebc2f-db47-4106-b0f1-02a8ddfa9dc0", - "metadata": {}, - "outputs": [], - "source": [ - "reshaped_coastal = grid.to_2d(regridded_coastal).pipe(center_longitude, 0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dcce5655-488a-4531-b0d8-8454250da579", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# This cell verifies the regridded data by plotting the count of non-NaN values.\n", - "# reshaped[\"diff\"].count([\"x\", \"y\"]).plot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9640d4ec-3863-4803-b907-a876d01235bb", - "metadata": {}, - "outputs": [], - "source": [ - "coastal_chunk = {\"x\": default_chunk_xy[\"x\"], \"y\": default_chunk_xy[\"y\"]}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "da754a3c-9c7c-4bef-b3e5-306ec512f3ca", - "metadata": {}, - "outputs": [], - "source": [ - "reshaped[\"diff\"] = reshaped[\"diff\"] / reshaped_coastal[\"dist\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dbf59a24-980a-4a60-8dab-4fd4f8da13b8", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# This cell saves the regridded data to Zarr format, then cleans up unnecessary variables to free up memory after the regridding process.\n", - "reshaped.chunk(default_chunk_xy).to_zarr(\n", - " f\"{target_root}/diff-regridded.zarr\",\n", - " mode=\"w\",\n", - " consolidated=True,\n", - " compute=True,\n", - " storage_options=storage_options,\n", - ")\n", - "\n", - "reshaped_coastal.chunk(coastal_chunk).to_zarr(\n", - " f\"{target_root}/coastal.zarr\",\n", - " mode=\"w\",\n", - " consolidated=True,\n", - " compute=True,\n", - " storage_options=storage_options,\n", - ")\n", - "# Cleanup unnecessary variables to free up memory\n", - "del ds, grid, target_grid, regridder, regridded, reshaped, reshaped_coastal" - ] - }, - { - "cell_type": "markdown", - "id": "5826c0a3-33ea-469d-a675-c681f2eaf05f", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## 4. **Construct Emission Matrix:** Create an emission matrix based on the transformed grid.\n", - "\n", - "In this step, we construct the emission probability matrix based on the differences between the observed tag temperature and the reference sea temperature computed in Workflow 2 and regridded in Workflow 3. The emission probability matrix represents the likelihood of observing a specific temperature difference given the model parameters and configurations.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ac5febe6-f6cd-4071-a437-ced8a3727220", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Import necessary libraries\n", - "from toolz.dicttoolz import valfilter\n", - "from pangeo_fish.distributions import create_covariances, normal_at\n", - "from pangeo_fish.pdf import normal" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "77e66335-933a-4a4b-869b-d261aefec110", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# Open the regridded diff data\n", - "differences = xr.open_dataset(\n", - " f\"{target_root}/diff-regridded.zarr\",\n", - " engine=\"zarr\",\n", - " chunks={},\n", - " storage_options=storage_options,\n", - ").pipe(lambda ds: ds.merge(ds[[\"latitude\", \"longitude\"]].compute()))\n", - "differences" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a74aa0be-55c2-48c9-ac3b-b9b631c91d91", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# Compute initial and final position\n", - "grid = differences[[\"latitude\", \"longitude\"]].compute()\n", - "\n", - "initial_position = tag[\"tagging_events\"].ds.sel(event_name=\"release\")\n", - "cov = create_covariances(1e-6, coord_names=[\"latitude\", \"longitude\"])\n", - "initial_probability = normal_at(\n", - " grid, pos=initial_position, cov=cov, normalize=True, axes=[\"latitude\", \"longitude\"]\n", - ")\n", - "\n", - "final_position = tag[\"tagging_events\"].ds.sel(event_name=\"fish_death\")\n", - "if final_position[[\"longitude\", \"latitude\"]].to_dataarray().isnull().all():\n", - " final_probability = None\n", - "else:\n", - " cov = create_covariances(recapture_std**2, coord_names=[\"latitude\", \"longitude\"])\n", - " final_probability = normal_at(\n", - " grid,\n", - " pos=final_position,\n", - " cov=cov,\n", - " normalize=True,\n", - " axes=[\"latitude\", \"longitude\"],\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "780a9ae3-6dab-4e8f-9148-de93f4ab9dce", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# compute emission probability matrix\n", - "\n", - "emission_pdf = (\n", - " normal(differences[\"diff\"], mean=0, std=differences_std, dims=[\"y\", \"x\"])\n", - " .to_dataset(name=\"pdf\")\n", - " .assign(\n", - " valfilter(\n", - " lambda x: x is not None,\n", - " {\n", - " \"initial\": initial_probability,\n", - " \"final\": final_probability,\n", - " \"mask\": differences[\"ocean_mask\"],\n", - " },\n", - " )\n", - " )\n", - " .assign_attrs(differences.attrs) # | {\"max_sigma\": max_sigma})\n", - ")\n", - "\n", - "emission_pdf = emission_pdf.chunk(default_chunk_xy).persist()\n", - "emission_pdf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c84436de-30ff-496c-9b16-fe4287517637", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Verify the data\n", - "# emission_pdf[\"pdf\"].count([\"x\", \"y\"]).plot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd184239-bccf-4e9b-8d13-54222dd57621", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# This cell saves the emission data to Zarr format, then cleans up unnecessary variables to free up memory.\n", - "\n", - "emission_pdf.to_zarr(\n", - " f\"{target_root}/emission.zarr\",\n", - " mode=\"w\",\n", - " consolidated=True,\n", - " storage_options=storage_options,\n", - ")\n", - "\n", - "del differences, grid, initial_probability, final_probability, emission_pdf" - ] - }, - { - "cell_type": "markdown", - "id": "2096e0f0-eebd-434d-9dd6-5e1a40b4da63", - "metadata": {}, - "source": [ - "## 5. **Replace emission for the tags with warm spikes detected**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d23d1259-79b4-45c6-a976-7f70295821cd", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from pangeo_fish.heat import heat_regulation, powerpalnt_emission_map\n", - "# Import necessary libraries and open data and perform initial setup\n", - "\n", - "\n", - "emission = xr.open_dataset(\n", - " f\"{target_root}/emission.zarr\",\n", - " engine=\"zarr\",\n", - " chunks={}, # \"x\": -1, \"y\": -1},\n", - " storage_options=storage_options,\n", - ")\n", - "emission" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c2045e2-31c8-4fa5-a64e-e95de2bd61a4", - "metadata": {}, - "outputs": [], - "source": [ - "if powerplant_flag:\n", - " # Loading detections, formatting and reducing observation window\n", - " detections = pd.read_csv(detection_file).set_index(\"time\").to_xarray()\n", - " detections[\"time\"] = detections[\"time\"].astype(\"datetime64\")\n", - " detections = detections.sel(\n", - " time=emission[\"time\"]\n", - " ) # Narrowing the data to the observed days only\n", - "\n", - " pp_map = (\n", - " pd.read_csv(powerplant_file, sep=\";\").drop(\"Country\", axis=1).to_xarray()\n", - " ) # Loading powerplant locations data\n", - "\n", - " # Combining and replacing the emission map at the given timestamps for the days where warm plume are detected\n", - " combined_masks = powerpalnt_emission_map(pp_map, emission, buffer_size, rot)\n", - " emission = heat_regulation(emission, detections, combined_masks)" - ] - }, - { - "cell_type": "markdown", - "id": "3a60f229-7dc5-4ccd-b5d0-dd6910f81247", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## 6. **Combine and Normalize Emission Matrix:** Merge the emission matrix and normalize it for further processing.\n", - "\n", - "In this step, we combine the emission probability matrix constructed in Workflow 4 and 5 then normalize it to ensure that the probabilities sum up to one. This step prepares the combined emission matrix for further analysis and interpretation.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2bbf45e3-5129-4993-a8f4-b57387256a12", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Import necessary libraries\n", - "from pangeo_fish.pdf import combine_emission_pdf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef911b87-2596-4ac1-9d86-1a61abeca0b3", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Open and combine the emission probability matrix\n", - "\n", - "combined = (\n", - " emission.pipe(combine_emission_pdf)\n", - " .chunk(default_chunk_xy)\n", - " .persist() # convert to comment if the emission matrix does *not* fit in memory\n", - ")\n", - "combined" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1cf452e4-72eb-4da6-a8e3-f49a8be69078", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Verify the data and visualize the sum of probabilities\n", - "# combined[\"pdf\"].sum([\"x\", \"y\"]).hvplot(width=400)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d6d55f6a-f7bb-469b-8df9-6aba977d83a9", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Save the combined and normalized emission matrix\n", - "combined.to_zarr(\n", - " f\"{target_root}/combined.zarr\",\n", - " mode=\"w\",\n", - " consolidated=True,\n", - " storage_options=storage_options,\n", - ")\n", - "del combined" - ] - }, - { - "cell_type": "markdown", - "id": "7f695094-640e-44b4-9654-2b8eb2b00efa", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "## 7. **Estimate Model Parameters:** Determine the parameters of the model based on the normalized emission matrix.\n", - "\n", - "This step first estimates maxixmum allowed value of model parameter 'sigma' max_sigma. Then we\n", - "create an optimizer with an expected parameter range, fitting the model to the normalized emission matrix. \n", - "The resulting optimized parameters is saved to a json file. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25abf847-e1e2-46d7-b54a-f25e57946e0c", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Import necessary libraries and modules for data analysis.\n", - "import xarray as xr\n", - "import pandas as pd\n", - "from pangeo_fish.hmm.estimator import EagerScoreEstimator\n", - "from pangeo_fish.hmm.optimize import EagerBoundsSearch\n", - "from pangeo_fish.utils import temporal_resolution\n", - "\n", - "# Open the data\n", - "emission = xr.open_dataset(\n", - " f\"{target_root}/combined.zarr\",\n", - " engine=\"zarr\",\n", - " chunks={},\n", - " inline_array=True,\n", - " storage_options=storage_options,\n", - ")\n", - "emission" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64b7afa6-196c-4210-816e-4bbf9495be44", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Compute maximum displacement for each reference model time step\n", - "# and estimate maximum sigma value for limiting the optimisation step\n", - "\n", - "earth_radius_ = xr.DataArray(earth_radius, dims=None)\n", - "\n", - "timedelta = temporal_resolution(emission[\"time\"]).pint.quantify().pint.to(\"h\")\n", - "grid_resolution = earth_radius_ * emission[\"resolution\"].pint.quantify()\n", - "\n", - "maximum_speed_ = xr.DataArray(maximum_speed, dims=None).pint.to(\"km / h\")\n", - "max_grid_displacement = maximum_speed_ * timedelta * adjustment_factor / grid_resolution\n", - "max_sigma = max_grid_displacement.pint.to(\"dimensionless\").pint.magnitude / truncate\n", - "emission.attrs[\"max_sigma\"] = max_sigma\n", - "max_sigma" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72d640c9-cc1c-4d27-8cb5-0d5f624a9a71", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Create and configure estimator and optimizer\n", - "emission = (\n", - " emission.compute()\n", - ") # Convert to comment if the emission matrix does *not* fit in memory\n", - "estimator = EagerScoreEstimator()\n", - "optimizer = EagerBoundsSearch(\n", - " estimator,\n", - " (1e-4, emission.attrs[\"max_sigma\"]),\n", - " optimizer_kwargs={\"disp\": 3, \"xtol\": tolerance},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92af7862-f1d4-43c5-b03d-e7d719035b3b", - "metadata": { - "editable": true, - "scrolled": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "%%time\n", - "# Fit the model parameter to the data\n", - "optimized = optimizer.fit(emission)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d7d77ba-4728-48d5-91a4-1d2b46144950", - "metadata": {}, - "outputs": [], - "source": [ - "# Save the optimized parameters\n", - "params = optimized.to_dict()\n", - "pd.DataFrame.from_dict(params, orient=\"index\").to_json(\n", - " f\"{target_root}/parameters.json\", storage_options=storage_options\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "87887f9d-127a-449f-bbb5-4ee1bbcaff56", - "metadata": {}, - "outputs": [], - "source": [ - "# Cleanup\n", - "del optimized, emission" - ] - }, - { - "cell_type": "markdown", - "id": "4f1f9b9d-a8a8-4bfa-b9bf-d69a37e23d74", - "metadata": {}, - "source": [ - "## 8. **Compute State Probabilities and Tracks:** Calculate the probability distribution of the species in question and compute the tracks.\n", - "\n", - "This step involves predicting state probabilities using the optimised parameter sigma computed in the last step together with normalized emission matrix. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eedfdf60-f712-4ac3-83f8-6888143cbbd3", - "metadata": {}, - "outputs": [], - "source": [ - "# Import necessary libraries and modules for data analysis.\n", - "import xarray as xr\n", - "import pandas as pd\n", - "from pangeo_fish.hmm.estimator import EagerScoreEstimator\n", - "from pangeo_fish.io import save_trajectories\n", - "\n", - "# Recreate the Estimator\n", - "params = pd.read_json(\n", - " f\"{target_root}/parameters.json\", storage_options=storage_options\n", - ").to_dict()[0]\n", - "optimized = EagerScoreEstimator(**params)\n", - "optimized" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "361999bd-7c8e-4835-9129-252fa0b20209", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "%%time\n", - "# Load the Data\n", - "emission = xr.open_dataset(\n", - " f\"{target_root}/combined.zarr\",\n", - " engine=\"zarr\",\n", - " chunks=default_chunk_xy,\n", - " inline_array=True,\n", - " storage_options=storage_options,\n", - ").compute()\n", - "\n", - "# Predict the State Probabilities\n", - "\n", - "states = optimized.predict_proba(emission)\n", - "states = states.to_dataset().chunk(default_chunk_xy).persist()\n", - "states" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3204e35f-4708-442b-a28a-f1748f101781", - "metadata": {}, - "outputs": [], - "source": [ - "# Verify the data and visualize the sum of probabilities\n", - "# states.sum([\"x\", \"y\"]).hvplot() +states.count([\"x\", \"y\"]).hvplot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d3b03df5-8c41-49b4-9cc8-a2d709be8553", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "# Save probability distirbution, state matrix.\n", - "states.chunk(default_chunk_xy).to_zarr(\n", - " f\"{target_root}/states.zarr\",\n", - " mode=\"w\",\n", - " consolidated=True,\n", - " storage_options=storage_options,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83da1a8f-ba47-4aa8-80fc-2f0a02c01f5a", - "metadata": {}, - "outputs": [], - "source": [ - "%%time \n", - "# decode tracks\n", - "\n", - "trajectories = optimized.decode(\n", - " emission,\n", - " states.fillna(0),\n", - " mode=track_modes,\n", - " progress=False,\n", - " additional_quantities=additional_track_quantities,\n", - ")\n", - "trajectories" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3fabb13b-0f14-414a-bcc7-0622e15db294", - "metadata": {}, - "outputs": [], - "source": [ - "# Save trajectories.\n", - "# Here we can chose format parquet for loading files from 'R'\n", - "# or chose to format 'geoparquet' for further analysis of tracks using\n", - "# geopands.\n", - "\n", - "save_trajectories(trajectories, target_root, storage_options, format=\"parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92ca87cf-b1c3-4eb1-8edb-9dad954243c8", - "metadata": {}, - "outputs": [], - "source": [ - "# Cleanup\n", - "del optimized, emission, states, trajectories" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 46a903639ce6e616b0f9f30cd48005ce971aca80 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Thu, 29 Aug 2024 17:22:26 +0000 Subject: [PATCH 05/13] Removing old function from the notebook --- docs/data_formating.ipynb | 116 -------------------------------------- 1 file changed, 116 deletions(-) diff --git a/docs/data_formating.ipynb b/docs/data_formating.ipynb index 0620546..305cd65 100644 --- a/docs/data_formating.ipynb +++ b/docs/data_formating.ipynb @@ -363,122 +363,6 @@ "csv_path" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfff5e2f-0ae1-4da0-945b-786db22c973d", - "metadata": {}, - "outputs": [], - "source": [ - "def extract_DST(file_path, time_zone, remote=False):\n", - " \"\"\"\n", - " Extracts time, pressure, and temperature data from a CSV file containing time series data.\n", - "\n", - " Args:\n", - " file_path (str): The path to the CSV file. For remote files, provide the S3 URI.\n", - " time_zone (str): The time zone for date conversion.\n", - " remote (bool): If True, fetch the file from S3. If False, read the file locally.\n", - "\n", - " Returns:\n", - " pandas.DataFrame: A DataFrame containing the extracted data.\n", - " \"\"\"\n", - " # List to store all the data\n", - " all_data = []\n", - " expected_length = 0\n", - "\n", - " # Extracting tag ID from the file path\n", - " tag_id = extract_name(file_path)\n", - "\n", - " if remote:\n", - " # Use s3fs to connect to the S3-compatible storage\n", - " s3 = s3fs.S3FileSystem(\n", - " anon=False,\n", - " client_kwargs={\n", - " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\", # S3 endpoint for OVH\n", - " },\n", - " )\n", - " # Open the file from S3\n", - " csvfile = s3.open(file_path, mode=\"r\", encoding=\"latin-1\")\n", - " else:\n", - " # Open the file locally\n", - " csvfile = open(file_path, newline=\"\", encoding=\"latin-1\")\n", - "\n", - " try:\n", - " # Create a CSV reader\n", - " csv_reader = csv.reader(csvfile, delimiter=\",\")\n", - "\n", - " # Variables to store data for the current block\n", - " data = []\n", - " reached_target_line = False\n", - "\n", - " # Read each line of the CSV file\n", - " for line in csv_reader:\n", - " # If the line is not empty and contains information about the expected length of data\n", - " if line and \"Data points available =\" in line[0]:\n", - " expected_length += int(line[0].split(sep=\"=\")[1])\n", - "\n", - " # Check if the current line is the target line\n", - " if not reached_target_line:\n", - " if line == [\"Date/Time Stamp\", \"Pressure\", \"Temp\"]:\n", - " reached_target_line = True\n", - " else:\n", - " # If the line is empty, add the data of the current block to the total and reset the data of the block\n", - " if not line:\n", - " if data:\n", - " all_data.extend(data)\n", - " data = []\n", - " reached_target_line = False\n", - " else:\n", - " # Otherwise, add the line of data to the current block\n", - " line[0] = format_date(line[0]) # Format date to ISO8601\n", - " line[1] = np.float64(\n", - " line[1]\n", - " ) # Convert data type from str to float64\n", - " line[2] = np.float64(\n", - " line[2]\n", - " ) # Convert data type from str to float64\n", - "\n", - " data.append(line)\n", - "\n", - " finally:\n", - " # Close the file after reading\n", - " csvfile.close()\n", - "\n", - " # Convert all the data into a pandas DataFrame\n", - " df = pd.DataFrame(all_data, columns=[\"time\", \"pressure\", \"temperature\"])[\n", - " [\"time\", \"temperature\", \"pressure\"]\n", - " ]\n", - "\n", - " # Getting all the timestamps\n", - " time_stamps = pd.to_datetime(df[\"time\"])\n", - "\n", - " # Calculting time deltas\n", - " time_deltas = time_stamps - time_stamps.iloc[0]\n", - "\n", - " # Getting first timestamp and converting it to utc.\n", - " initial_time = time_stamps.iloc[0].strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n", - " time_utc = pd.to_datetime(\n", - " convert_to_utc_with_formatting(initial_time, \"Europe/Paris\")\n", - " )\n", - "\n", - " # Calculating the new timestamps series and formatting it to ISO8601\n", - " corrected_timestamps = time_deltas + time_utc\n", - " formatted_corrected_timestamps = corrected_timestamps.dt.strftime(\n", - " \"%Y-%m-%dT%H:%M:%SZ\"\n", - " )\n", - "\n", - " # Replacing the in the dataframe\n", - " df[\"time\"] = formatted_corrected_timestamps\n", - "\n", - " # Check if the expected length matches the actual length of data extracted\n", - " if expected_length == df.shape[0]:\n", - " print(\"Extraction for tag {} complete, no missing data\".format(tag_id))\n", - " else:\n", - " print(\"Extraction for tag {} might be incomplete, be careful\".format(tag_id))\n", - "\n", - " return df" - ] - }, { "cell_type": "code", "execution_count": null, From 187d9d070dfad66b863489ab688a87ad1b6dd690 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Fri, 30 Aug 2024 10:59:41 +0000 Subject: [PATCH 06/13] Documenting papermill notebook --- docs/papermill_launcher.ipynb | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/docs/papermill_launcher.ipynb b/docs/papermill_launcher.ipynb index baf145f..ca1f28a 100644 --- a/docs/papermill_launcher.ipynb +++ b/docs/papermill_launcher.ipynb @@ -6,21 +6,36 @@ "metadata": {}, "source": [ "# Executing multiple parameters notebooks with papermill\n", - "___\n" + "___\n", + "**[Papermill](https://papermill.readthedocs.io/en/latest/)** is a Python library that allows you to parameterize, execute, and analyze Jupyter Notebooks, making it useful for automating and scaling data analysis workflows.\n", + "\n", + "In this notebook, we will set up a routine that can generate trajectories for multiple fishes with adjusted parameters.\n", + "First we define important parameters that will be used in the loop that executes the notebooks.\n", + "The second part will generate ipynb files, based on a template noteboook, with the modified parameters, defined in the first cells of the notebook" + ] + }, + { + "cell_type": "markdown", + "id": "a4f4f33a-4e6e-4871-af36-c5e1fedce33f", + "metadata": {}, + "source": [ + "### Parameters set up" ] }, { "cell_type": "raw", - "id": "d8f4af88-0bb3-4ac0-bede-eaf6245f8435", + "id": "b0b764fe-c861-4eb5-83b5-c5587fda1be6", "metadata": {}, "source": [ - "!pip install papermill" + "### Installations\n", + "!pip install ~/git/pangeo-fish/\n", + "!pip install papermill copernicusmarine " ] }, { "cell_type": "code", "execution_count": null, - "id": "b1283ae9-705a-4c0c-b68e-7f4986348f4f", + "id": "dc4bec0e-75c7-422d-8dd5-0d0d9c2c87ec", "metadata": {}, "outputs": [], "source": [ @@ -73,12 +88,13 @@ "# local_output is the path where the parametrized notebooks will be stored\n", "local_output = \"papermill_output\"\n", "\n", + "#Change notebook path to the notebook in pangeo-fish\n", "input_notebook = \"pangeo-fish_papermill.ipynb\"\n", "\n", "# cloud_root is the path to acces the files on remote\n", "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", "\n", - "# folder name is the name of the folder where the result will be stored on the bucket\n", + "# folder name is the name of the folder where the result will be stored \n", "folder_name = \"tracks_test\"" ] }, @@ -242,7 +258,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.12.5" } }, "nbformat": 4, From 3a8fbaba3a54a5853f914e626fd84760144c2a37 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Fri, 30 Aug 2024 11:02:09 +0000 Subject: [PATCH 07/13] Precommit fail fix --- docs/papermill_launcher.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/papermill_launcher.ipynb b/docs/papermill_launcher.ipynb index ca1f28a..3d4c6ef 100644 --- a/docs/papermill_launcher.ipynb +++ b/docs/papermill_launcher.ipynb @@ -88,13 +88,13 @@ "# local_output is the path where the parametrized notebooks will be stored\n", "local_output = \"papermill_output\"\n", "\n", - "#Change notebook path to the notebook in pangeo-fish\n", + "# Change notebook path to the notebook in pangeo-fish\n", "input_notebook = \"pangeo-fish_papermill.ipynb\"\n", "\n", "# cloud_root is the path to acces the files on remote\n", "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", "\n", - "# folder name is the name of the folder where the result will be stored \n", + "# folder name is the name of the folder where the result will be stored\n", "folder_name = \"tracks_test\"" ] }, From 43b7a0a7a2918858352958197ba64002eb02c077 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Fri, 30 Aug 2024 11:03:37 +0000 Subject: [PATCH 08/13] pre-commit fix --- docs/data_formating.ipynb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/data_formating.ipynb b/docs/data_formating.ipynb index 305cd65..c0f0e62 100644 --- a/docs/data_formating.ipynb +++ b/docs/data_formating.ipynb @@ -25,15 +25,11 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import numpy as np\n", "from data_conversion import extract_tagging_events\n", "from data_conversion import create_metadata_file\n", "from data_conversion import extract_name\n", - "from data_conversion import format_date\n", "from data_conversion import extract_DST\n", - "from data_conversion import convert_to_utc_with_formatting\n", "\n", - "import csv\n", "import os\n", "from tqdm import tqdm" ] From 28c978b77f65f59d27d5b938d4e83d1f5650ec43 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Fri, 30 Aug 2024 13:16:47 +0000 Subject: [PATCH 09/13] Adding comment on papermill_launcher.ipynb --- docs/papermill_launcher.ipynb | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/docs/papermill_launcher.ipynb b/docs/papermill_launcher.ipynb index 3d4c6ef..7b3a7b9 100644 --- a/docs/papermill_launcher.ipynb +++ b/docs/papermill_launcher.ipynb @@ -186,10 +186,24 @@ "source": [ "___\n", "### Explantion of the code below \n", - "- nbs is a list of the notebooks that has been processed, wether they failed or not.\n", - "- The code loops over the tag id present in tag list and calculates the time difference in the tagging events.\n", "- If the fish has observation over 2 days and has not been processed yet, it starts running a parametrized notebook.\n", - "- If it succeds, the generated notebook is placed papermill_output/done, else, it goes at papermill_output/failed" + "- nbs is a list of the notebooks that has been processed, wether they failed or not. This list is used to keep a track of the tags that already has been generated.\n", + "These two conditions are used in the following way.\n", + "```\n", + "observation_length = (recapture_date - release_date) / np.timedelta64(1, \"D\")\n", + "\n", + "if (\n", + " (tag_name not in nbs) and observation_length > 2\n", + "): # Use this statement if you already start a computation that has been interrupted but the generation is still valid\n", + " # if time_difference > 2: # Use this if you want every tag in tag list to be processed\n", + "```\n", + "First, it means that if the fish, based on the tagging events, has a observation period of less than two days, his trajectory will not be computed.\n", + "Second, It means that you can either choose to regenerate for all the tags that you generated once if you noticed that there was an issue in the results.\n", + "If the generation was interrupted during the process but the results are valid, you can start back you computation where it has stopped.\n", + "\n", + "- The code loops over the tag id present in tag list and calculates the time difference in the tagging events.\n", + "- If it succeds, the generated notebook is placed papermill_output/done, else, it goes at papermill_output/failed\n", + " " ] }, { @@ -213,16 +227,16 @@ "for tag_name in tqdm(tag_list, desc=\"Processing tags\"):\n", " try:\n", " te = pd.read_csv(s3.open(f\"{cloud_root}/cleaned/{tag_name}/tagging_events.csv\"))\n", - " np_datetime1 = np.datetime64(\n", + " release_date = np.datetime64(\n", " datetime.strptime(te[\"time\"][0], \"%Y-%m-%dT%H:%M:%SZ\")\n", " )\n", - " np_datetime2 = np.datetime64(\n", + " recapture_date = np.datetime64(\n", " datetime.strptime(te[\"time\"][1], \"%Y-%m-%dT%H:%M:%SZ\")\n", " )\n", - " time_difference = (np_datetime2 - np_datetime1) / np.timedelta64(1, \"D\")\n", + " observation_length = (recapture_date - release_date) / np.timedelta64(1, \"D\")\n", "\n", " if (\n", - " (tag_name not in nbs) and time_difference > 2\n", + " (tag_name not in nbs) and observation_length > 2\n", " ): # Use this statement if you already start a computation that has been interrupted but the generation is still valid\n", " # if time_difference > 2: # Use this if you want every tag in tag list to be processed\n", "\n", From 6a05ed585d34c4b823ffbebfdecc67c8521473e5 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Fri, 30 Aug 2024 13:42:12 +0000 Subject: [PATCH 10/13] Removing comments in the code --- docs/papermill_launcher.ipynb | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/papermill_launcher.ipynb b/docs/papermill_launcher.ipynb index 7b3a7b9..7075fa9 100644 --- a/docs/papermill_launcher.ipynb +++ b/docs/papermill_launcher.ipynb @@ -192,15 +192,17 @@ "```\n", "observation_length = (recapture_date - release_date) / np.timedelta64(1, \"D\")\n", "\n", - "if (\n", - " (tag_name not in nbs) and observation_length > 2\n", - "): # Use this statement if you already start a computation that has been interrupted but the generation is still valid\n", - " # if time_difference > 2: # Use this if you want every tag in tag list to be processed\n", + "if ((tag_name not in nbs) and observation_length > 2): \n", "```\n", "First, it means that if the fish, based on the tagging events, has a observation period of less than two days, his trajectory will not be computed.\n", "Second, It means that you can either choose to regenerate for all the tags that you generated once if you noticed that there was an issue in the results.\n", "If the generation was interrupted during the process but the results are valid, you can start back you computation where it has stopped.\n", "\n", + "You might need to update this line of to switch from one behaviour to another, by removing \n", + "```\n", + "(tag_name not in nbs) and\n", + "```\n", + "from this statement.\n", "- The code loops over the tag id present in tag list and calculates the time difference in the tagging events.\n", "- If it succeds, the generated notebook is placed papermill_output/done, else, it goes at papermill_output/failed\n", " " @@ -235,11 +237,8 @@ " )\n", " observation_length = (recapture_date - release_date) / np.timedelta64(1, \"D\")\n", "\n", - " if (\n", - " (tag_name not in nbs) and observation_length > 2\n", - " ): # Use this statement if you already start a computation that has been interrupted but the generation is still valid\n", - " # if time_difference > 2: # Use this if you want every tag in tag list to be processed\n", - "\n", + " if ((tag_name not in nbs) and observation_length > 2): \n", + " \n", " print(tag_name)\n", " print(datetime.now(timezone).strftime(\"%Y-%m-%d %H:%M:%S\"))\n", " param[\"tag_name\"] = tag_name\n", From 0583b6218bcdc54c3995aadcc36381ba358fd778 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Fri, 30 Aug 2024 13:43:47 +0000 Subject: [PATCH 11/13] precommit fix --- docs/papermill_launcher.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/papermill_launcher.ipynb b/docs/papermill_launcher.ipynb index 7075fa9..6ec3fd1 100644 --- a/docs/papermill_launcher.ipynb +++ b/docs/papermill_launcher.ipynb @@ -237,8 +237,7 @@ " )\n", " observation_length = (recapture_date - release_date) / np.timedelta64(1, \"D\")\n", "\n", - " if ((tag_name not in nbs) and observation_length > 2): \n", - " \n", + " if (tag_name not in nbs) and observation_length > 2:\n", " print(tag_name)\n", " print(datetime.now(timezone).strftime(\"%Y-%m-%d %H:%M:%S\"))\n", " param[\"tag_name\"] = tag_name\n", From fbe0730e0bce5662150b67088b26ce9992dcb050 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Fri, 30 Aug 2024 14:32:13 +0000 Subject: [PATCH 12/13] Modifying the panel and adding a markdown table summing up the different generation --- docs/pannel_plot_s3.ipynb | 269 +++++++++++++++++++++++--------------- 1 file changed, 164 insertions(+), 105 deletions(-) diff --git a/docs/pannel_plot_s3.ipynb b/docs/pannel_plot_s3.ipynb index 453c456..ddb4607 100644 --- a/docs/pannel_plot_s3.ipynb +++ b/docs/pannel_plot_s3.ipynb @@ -1,11 +1,20 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "27372111-4c7d-4cb9-8960-01cff85aa843", + "metadata": {}, + "source": [ + "# Fish track visualistion" + ] + }, { "cell_type": "markdown", "id": "14af151e-2ee9-4233-89fd-4499c981b860", "metadata": {}, "source": [ - "# **This notebooks is used to plot a panel dashboard to visualize data for the GFTS project** \n", + "___\n", + "### This notebooks is used to plot a panel dashboard to visualize data for the GFTS project\n", "____\n", "The [panel dashboard](https://panel.holoviz.org/) displays the following informations :\n", "- The temperature measured by the fish.\n", @@ -16,7 +25,7 @@ "\n", "To acces this notebook you have to clone the following [github repository](https://github.com/destination-earth/DestinE_ESA_GFTS). \n", "First you will fork the repository, see this [link](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo). \n", - "Then, you will clone **your forked version**, you will use the git jupyter extension, the section is in the sidebar. Here is a [tutorial](https://blog.reviewnb.com/jupyterlab-git-extension/#git-workflow-clone). \n", + "Then, you will clone **your forked version**, use the git section of this notebook with the side bar. Here is a [tutorial](https://blog.reviewnb.com/jupyterlab-git-extension/#git-workflow-clone). \n", "If you want to submit code reviews, please see this [rule of participation](https://github.com/destination-earth/DestinE_ESA_GFTS/blob/main/docs/rule_of_participation.md)\n", "\n", "To be able to run this notebook you need to clone [pangeo-fish](https://github.com/IAOCEA/pangeo-fish) repository, you can reuse the previous steps you used before, no need to fork the repository this time. \n", @@ -27,16 +36,39 @@ "Normally, all the other library to use this notebook should already installed once this has been done. To start the preview with panel, click the blue pannel logo ![Panel logo]() in the notebook tool bar. \n", "This will open a new tab in your notebook that displays panel informations.\n", "\n", - "Please note that all the tracks have been generated but note checked manually, some can display incoherent data. It will be checked and corrected later on." + "Please note that all the tracks have been generated but note checked manually, some can display incoherent data. It will be checked and corrected later on.\n", + "\n", + "The trajectories available from S3 has been generated using this the papermill_launcher.ipynb notebook, see this notebook to understand more about it.\n", + "Using this panel, user can examine the results of a computation. This helps to understand how the algorithm behave for different situations." + ] + }, + { + "cell_type": "markdown", + "id": "6d3f8db5-1a4e-4ee6-9137-2fd43e4a2fd1", + "metadata": {}, + "source": [ + "Here is a table that sums up all the generations\n", + "You can use the value of generation name and set the variable generation_name to acces the corresponding generation.\n", + "\n", + "| generation_name | dataset | bbox | correction methods | comment |\n", + "|-----------------|---------------------------|----------------------------------|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "| tracks | IBI_MULTIYEAR_PHY_005_002 | latitude : 42,53- longitude -8,4 | None | 415 computed, 12 wrong tracks |\n", + "| tracks_2 | IBI_MULTIYEAR_PHY_005_002 | latitude : 40,56- longitude -13,5 |Coastal variability, alpha=0.01 and MarkovAutoregression |431 computed, 32 wrong tracks. New bounding box that extends to all the fishes recapture positions. |\n", + "| tracks_3 | IBI_MULTIYEAR_PHY_005_002 | latitude : 40,56- longitude -13,5 |Coastal_variability, alpha=0.01 and MarkovAutoregression |395 computed, 39 wrong tracks. Implementation of a new method to correct issues due to anomalies in biologging data and test of a modelisation of the coastal variability This generation was not complete because there was an issue in the data. |\n", + "| tracks_4 | IBI_MULTIYEAR_PHY_005_002 | latitude : 40,56- longitude -13,5 | Coastal variability, alpha=0.01 |421 computed, 60 wrong tracks. Same parameters as the previous one but with corrected input data. This is not implementing the anomalies in biologging fix. |\n", + "| DK_1 | IBI_MULTIYEAR_PHY_005_002 | latitude : 40,56- longitude -13,5 | Markov autoregression | This generation is focusing only on fixing the issues observed from tags in region of dunkerque, which are subject to heat spikes anomalies. To correct this issue, a spike detection algorithm was implemented using a markovautoregression algorithm. |\n", + "| DK_2 | IBI_MULTIYEAR_PHY_005_002 | latitude : 40,56- longitude -13,5 | Diff | This generation is using another techinque since markovautoregression was not detecting all the spikes. |\n", + "| DK_3 | IBI_MULTIYEAR_PHY_005_002 | latitude : 40,56- longitude -13,5 | Diff | This generation is using a lower value of maximum speed, this contains only the results from the ones that has already failed before. |\n", + "| DK_final | IBI_MULTIYEAR_PHY_005_002 | latitude : 40,56- longitude -13,5 |Diff | In this folder, there is the data for all the DK tags. The corrected ones that from DK_3 and the ones that were already correct from the beginning. |" ] }, { "cell_type": "raw", - "id": "4a11fcb6-6a6a-4ea1-b28c-fb751b576665", + "id": "7112a4cf-be81-4c65-9c83-12f23c0d5751", "metadata": {}, "source": [ "# Install pangeo fish if necessary\n", - "!pip install pangeo-fish/ " + "!pip install ~/git/pangeo-fish/ " ] }, { @@ -49,15 +81,22 @@ "# Import necessary libraries and modules.\n", "import os\n", "import xarray as xr\n", + "from pint_xarray import unit_registry as ureg\n", "from pangeo_fish.io import open_tag\n", + "import hvplot.xarray\n", "import pandas as pd\n", + "import geopandas as gpd\n", "import movingpandas as mpd\n", - "from pangeo_fish.tags import to_time_slice\n", + "import hvplot.pandas\n", + "from pangeo_fish.tags import adapt_model_time, reshape_by_bins, to_time_slice\n", "import holoviews as hv\n", + "import cmocean\n", "import panel as pn\n", + "import numpy as np\n", "from pangeo_fish.io import read_trajectories\n", "from pangeo_fish import visualization\n", - "import s3fs" + "import s3fs\n", + "import json" ] }, { @@ -76,38 +115,34 @@ " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", " },\n", ")\n", - "# Tag list is the list of available tags\n", - "tag_list_ = s3.ls(\"gfts-ifremer/tags/bargip/tracks\")\n", - "tag_list = [\n", - " tag.replace(\"gfts-ifremer/tags/bargip/tracks/\", \"\")\n", - " for tag in tag_list_\n", - " if tag.replace(\"gfts-ifremer/tags/bargip/tracks/\", \"\")\n", - " in [\n", - " fail.replace(\".ipynb\", \"\")\n", - " for fail in os.listdir(\"../notebooks/papermill/papermill_output/done/\")\n", - " ]\n", - "]\n", "\n", + "### Update this with the name of the folder where the results are stored\n", + "generation_name = \"tracks_3\"\n", + "\n", + "# Tag list is the list of available tags\n", + "remote_path = \"gfts-ifremer/tags/bargip\"\n", + "tag_list_ = s3.ls(f\"{remote_path}/{generation_name}\")\n", + "tag_list = [tag.replace(f\"{remote_path}/{generation_name}/\",\"\") for tag in tag_list_ if tag.replace(f\"{remote_path}/{generation_name}/\",\"\")]\n", "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", "\n", "# tag_root specifies the root URL for tag data used for this computation.\n", "tag_root = f\"{cloud_root}/cleaned\"\n", "\n", "# scratch_root specifies the root directory where are GFTS computation data stored.\n", - "scratch_root = f\"{cloud_root}/tracks\"\n", + "scratch_root = f\"{cloud_root}/{generation_name}\"\n", "\n", "# storage_options specifies options for the filesystem storing and/or opening output files.\n", "storage_options = {\n", - " \"anon\": False,\n", + " 'anon': False, \n", " # 'profile' : \"gfts\",\n", - " \"client_kwargs\": {\n", + " 'client_kwargs': {\n", " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", " \"region_name\": \"gra\",\n", - " },\n", + " }\n", "}\n", "\n", "# bbox, bounding box, defines the latitude and longitude range for the analysis area.\n", - "bbox = {\"latitude\": [42, 53], \"longitude\": [-8, 4]}\n", + "bbox = {\"latitude\": [40, 56], \"longitude\": [-13, 5]} \n", "# tramodes are the two types of track that have been computed for GFTS.\n", "track_modes = [\"mean\", \"mode\"]" ] @@ -120,95 +155,113 @@ "outputs": [], "source": [ "# pn.cache stores all the plot outputs to avoid doing the computation every time. Might need to disable if manipulating a wide amount of files.\n", - "@pn.cache\n", + "# @pn.cache\n", "\n", "# Functions to plot the different visualization for a given tag id\n", - "def plot_time_series(plot_type=\"time series\", tag_id=\"CB_A11071\"):\n", - " # load trajectories\n", - " trajectories = read_trajectories(\n", - " track_modes, f\"{scratch_root}/{tag_id}\", storage_options, format=\"parquet\"\n", - " )\n", + "def plot_time_series(plot_type=\"time series\",tag_id=\"CB_A11071\"):\n", + " # load trajectories \n", + " trajectories = read_trajectories(track_modes,f\"{scratch_root}/{tag_id}\",storage_options, format=\"parquet\")\n", "\n", " # Converting the trajectories to pandas DataFrames to access data easily\n", " mean_df = trajectories.trajectories[0].df\n", " mode_df = trajectories.trajectories[1].df\n", "\n", " tag = open_tag(tag_root, tag_id)\n", - " time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", - " tag_log = tag[\"dst\"].ds.sel(time=time_slice)\n", + " # time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", "\n", + " time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", + " \n", + " time = tag[\"dst\"].ds.time\n", + " cond = (time <= time_slice.stop) & (time >= time_slice.start)\n", + " \n", + " tag_log = tag[\"dst\"].ds.where(cond,drop=True)\n", + " \n", + " min_ = tag_log.time[0]\n", + " max_ = tag_log.time[-1]\n", + " \n", + " time_slice = slice(min_.data, max_.data)\n", + "\n", + " \n", + " tag_log = tag[\"dst\"].ds.sel(time=time_slice) \n", + " \n", " # Creating pandas series for xarrray dataset\n", - " mean_lon_ = pd.Series(mean_df.geometry.x, name=\"longitude\")\n", - " mean_lat_ = pd.Series(mean_df.geometry.y, name=\"latitude\")\n", - " mode_lon_ = pd.Series(mode_df.geometry.x, name=\"longitude\")\n", - " mode_lat_ = pd.Series(mode_df.geometry.y, name=\"latitude\")\n", - "\n", + " mean_lon_ = pd.Series(mean_df.geometry.x,name=\"longitude\")\n", + " mean_lat_ = pd.Series(mean_df.geometry.y,name=\"latitude\")\n", + " mode_lon_ = pd.Series(mode_df.geometry.x,name=\"longitude\")\n", + " mode_lat_ = pd.Series(mode_df.geometry.y,name=\"latitude\")\n", + " \n", " # Creating xarray datasets\n", " mean_coords = xr.Dataset(pd.concat([mean_lon_, mean_lat_], axis=1))\n", " mode_coords = xr.Dataset(pd.concat([mode_lon_, mode_lat_], axis=1))\n", - "\n", + " \n", " # Assigning dataarrays to variables\n", " mean_lon = mean_coords[\"longitude\"]\n", " mean_lat = mean_coords[\"latitude\"]\n", " mode_lon = mode_coords[\"longitude\"]\n", " mode_lat = mode_coords[\"latitude\"]\n", "\n", - " temp_plot = tag_log[\"temperature\"].hvplot(\n", - " color=\"Red\", title=\"Temperature (°C)\", grid=True, height=200, width=600\n", - " )\n", - " depth_plot = tag_log[\"pressure\"].hvplot(\n", - " color=\"Blue\", title=\"Pressure (m)\", grid=True, height=200, width=600\n", - " )\n", - " lon_plot = (\n", - " mean_lat.hvplot(label=\"mean\", clim=[mean_lat_.min(), mean_lat_.max()])\n", - " * mode_lat.hvplot(label=\"mode\", clim=[mode_lat_.min(), mean_lat_.max()])\n", - " ).opts(height=200, width=600, show_grid=True, title=\"Fish latitude over time\")\n", - " lat_plot = (\n", - " mean_lon.hvplot(label=\"mean\", clim=[mean_lon_.min(), mean_lat_.max()])\n", - " * mode_lon.hvplot(label=\"mode\", clim=[mode_lon_.min(), mean_lat_.max()])\n", - " ).opts(height=200, width=600, show_grid=True, title=\"Fish longitude over time\")\n", + " tag_log[\"depth\"] = tag_log[\"pressure\"]\n", + " temp_plot = tag_log[\"temperature\"].hvplot(color=\"Red\",title=\"Temperature (°C)\",grid=True,height=200,width=600)\n", + " depth_plot = (-tag_log[\"depth\"]).hvplot(color =\"Blue\",title=\"Depth (m)\",grid=True,height=200,width=600)\n", + " lon_plot = (mean_lat.hvplot(label=\"mean\",clim=[mean_lat_.min(),mean_lat_.max()]) * mode_lat.hvplot(label = \"mode\",clim=[mode_lat_.min(),mean_lat_.max()])).opts(height=200,width=600,show_grid=True,title = \"Fish latitude over time\")\n", + " lat_plot = (mean_lon.hvplot(label=\"mean\",clim=[mean_lon_.min(),mean_lat_.max()]) * mode_lon.hvplot(label = \"mode\",clim=[mode_lon_.min(),mean_lat_.max()])).opts(height=200,width=600,show_grid=True,title = \"Fish longitude over time\")\n", "\n", " return (temp_plot + depth_plot + lon_plot + lat_plot).cols(1)\n", "\n", "\n", - "def plot_track(tag_id=\"CB_A11071\"):\n", - " trajectories = read_trajectories(\n", - " track_modes, f\"{scratch_root}/{tag_id}\", storage_options, format=\"parquet\"\n", + "def plot_anomaly(tag_id=\"CB_A11071\"):\n", + " tag = open_tag(tag_root, tag_id)\n", + " time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", + " tag_log = tag[\"dst\"].ds.sel(time=time_slice) \n", + " ds = tag_log\n", + " # Calculate the differences\n", + " ds['temp_diff'] = ds.temperature.diff('time')\n", + " \n", + " # Define a threshold for significant temperature rise\n", + " threshold = 0.2\n", + " \n", + " # Identify significant rises\n", + " ds['event'] = ds['temp_diff'] > threshold\n", + " \n", + " # Extract significant points\n", + " significant_points = ds.where(ds['event'], drop=True)\n", + " \n", + " # Plot the time series\n", + " time_series_plot = ds.temperature.hvplot(line_color='blue', label='Temperature',width=1000,height=500)\n", + " \n", + " # Plot the significant rise points\n", + " significant_points_plot = significant_points.temperature.hvplot.scatter(\n", + " color='red', marker='x', size=100, label='Significant Rise',width=1000,height=500\n", " )\n", + " \n", + " # Display the combined plot\n", + " return hv.Overlay([time_series_plot, significant_points_plot]).opts(width=1000,height=500)\n", + "\n", + "def plot_track(tag_id=\"CB_A11071\"):\n", + " sigma = pd.read_json(f\"{scratch_root}/{tag_id}/parameters.json\").to_dict()[0][\"sigma\"]\n", + " trajectories = read_trajectories(track_modes,f\"{scratch_root}/{tag_id}\",storage_options, format=\"parquet\")\n", "\n", " # Converting the trajectories to pandas DataFrames to access data easily\n", " mean_df = trajectories.trajectories[0].df\n", " mode_df = trajectories.trajectories[1].df\n", - "\n", + " \n", " # Adding month data\n", " mean_df[\"month\"] = mean_df.index.month\n", " mode_df[\"month\"] = mode_df.index.month\n", - "\n", + " \n", " # Converting back to trajectories\n", - " mean_traj = mpd.Trajectory(\n", - " mean_df, traj_id=mean_df.traj_id.drop_duplicates().values[0]\n", - " )\n", - " mode_traj = mpd.Trajectory(\n", - " mode_df, traj_id=mode_df.traj_id.drop_duplicates().values[0]\n", - " )\n", - " trajectories = mpd.TrajectoryCollection([mean_traj, mode_traj])\n", - "\n", + " mean_traj = mpd.Trajectory(mean_df,traj_id=mean_df.traj_id.drop_duplicates().values[0])\n", + " mode_traj = mpd.Trajectory(mode_df,traj_id=mode_df.traj_id.drop_duplicates().values[0])\n", + " trajectories = mpd.TrajectoryCollection([mean_traj,mode_traj])\n", + " \n", " traj_plots = [\n", - " traj.hvplot(\n", - " c=\"month\",\n", - " tiles=\"CartoLight\",\n", - " cmap=\"rainbow\",\n", - " title=traj.id,\n", - " width=500,\n", - " height=500,\n", - " )\n", + " traj.hvplot(c=\"month\",tiles=\"CartoLight\",cmap=\"rainbow\", title=f\"{tag_id} , {traj.id}, {sigma}\",width=375,height=375)\n", " for traj in trajectories.trajectories\n", " ]\n", - "\n", + " \n", " return hv.Layout(traj_plots).cols(1)\n", - "\n", - "\n", - "def plot_emission(tag_id=\"DK_A10531\"):\n", + " \n", + "def plot_emission(tag_id=\"CB_A11071\"):\n", " ## Might not work if dask involved or slider involved, I have to test\n", " emission = (\n", " xr.open_dataset(\n", @@ -219,26 +272,23 @@ " storage_options=storage_options,\n", " )\n", " .rename_vars({\"pdf\": \"emission\"})\n", - " .drop_vars([\"final\", \"initial\"])\n", - " )\n", - "\n", - " states = xr.open_dataset(\n", - " f\"{scratch_root}/{tag_id}/states.zarr\",\n", - " engine=\"zarr\",\n", - " chunks={},\n", - " inline_array=True,\n", - " storage_options=storage_options,\n", - " ).where(emission[\"mask\"])\n", - "\n", - " data = xr.merge([states, emission.drop_vars([\"mask\"])])\n", - " plot1 = visualization.plot_map(data[\"states\"], bbox, cmap=\"cool\").opts(\n", - " height=300, width=600\n", " )\n", - " plot2 = visualization.plot_map(data[\"emission\"], bbox, cmap=\"cool\").opts(\n", - " height=300, width=600\n", + " \n", + " states = (\n", + " xr.open_dataset(\n", + " f\"{scratch_root}/{tag_id}/states.zarr\", \n", + " engine=\"zarr\", \n", + " chunks={}, \n", + " inline_array=True,\n", + " storage_options=storage_options,\n", + " ).where(emission[\"mask\"])\n", " )\n", - " plot = hv.Layout([plot1, plot2]).cols(1)\n", - "\n", + " \n", + " data = xr.merge([states, emission.drop_vars([\"mask\"])])\n", + " plot1 = visualization.plot_map(data[\"states\"].sel(time=slice(\"2015-09-04\",\"2015-09-10\")),bbox,cmap=\"cool\").opts(height=350,width=600)\n", + " plot2 = visualization.plot_map(data[\"emission\"].sel(time=slice(\"2015-09-04\",\"2015-09-10\")),bbox,cmap=\"cool\").opts(height=350,width=600)\n", + " plot=hv.Layout([plot1, plot2]).cols(1) \n", + " \n", " return plot" ] }, @@ -250,19 +300,20 @@ "outputs": [], "source": [ "# Panel parameters\n", + "value=tag_list[0]\n", + "#Initalizing the widget for tag selection \n", + "tag_widget = pn.widgets.Select(name=\"tag_id\", value=value, options=tag_list)\n", "\n", - "# Initalizing the widget for tag selection\n", - "tag_widget = pn.widgets.Select(name=\"tag_id\", value=\"NO_A12710\", options=tag_list)\n", + "#Binding widget with the plots\n", "\n", - "# Binding widget with the plots\n", - "\n", - "time_plot = pn.bind(plot_time_series, tag_id=tag_widget)\n", - "track_plot = pn.bind(plot_track, tag_id=tag_widget)\n", + "time_plot = pn.bind(plot_time_series,tag_id=tag_widget)\n", + "track_plot = pn.bind(plot_track,tag_id=tag_widget)\n", + "# Commenting emission because it's too long to load panel \n", "# emission_plot = pn.bind(plot_emission,tag_id=tag_widget)\n", - "track_emission = pn.Row(time_plot, track_plot)\n", + "track_emission = pn.Row(time_plot,track_plot)\n", "\n", - "# Combining plots with the widget\n", - "plots = pn.Row(tag_widget, track_emission)\n", + "#Combining plots with the widget\n", + "plots = pn.Row(tag_widget,track_emission)\n", "\n", "pn.template.FastListTemplate(\n", " site=\"Tag data display\",\n", @@ -274,12 +325,20 @@ { "cell_type": "code", "execution_count": null, - "id": "11d9abdd-4ada-40ee-a89d-f21bfb538ca0", + "id": "6ff000e3-47dd-41fd-9cbb-cd22a5193ca7", "metadata": {}, "outputs": [], "source": [ "plots" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9fcd1a79-6125-4f6e-882c-2be556d68128", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -298,7 +357,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.12.5" } }, "nbformat": 4, From 348cc4aed1c70bfaecb0ce13a9a71fa156346ce3 Mon Sep 17 00:00:00 2001 From: aderrien7 Date: Wed, 4 Sep 2024 12:24:08 +0000 Subject: [PATCH 13/13] Skipping pre-commit fail and add explaination to the functionning of the pannel notbeook --- docs/pannel_plot_s3.ipynb | 296 +++++++++++++++++++++++--------------- 1 file changed, 182 insertions(+), 114 deletions(-) diff --git a/docs/pannel_plot_s3.ipynb b/docs/pannel_plot_s3.ipynb index ddb4607..a7b4926 100644 --- a/docs/pannel_plot_s3.ipynb +++ b/docs/pannel_plot_s3.ipynb @@ -20,6 +20,7 @@ "- The temperature measured by the fish.\n", "- The depth measured by the fish.\n", "- The evolution of the latitude and the longitude during the time.\n", + "- The trajectory generated by the algorithm\n", "___\n", "First of all, you need to access the following [jubyterhub server](https://gfts.minrk.net), you will need to log with your github account and you will select a configuration for starting a notebook. \n", "\n", @@ -38,7 +39,7 @@ "\n", "Please note that all the tracks have been generated but note checked manually, some can display incoherent data. It will be checked and corrected later on.\n", "\n", - "The trajectories available from S3 has been generated using this the papermill_launcher.ipynb notebook, see this notebook to understand more about it.\n", + "The trajectories available from S3 has been generated using this the papermill_launcher.ipynb notebook, see this notebook to understand more about it. \n", "Using this panel, user can examine the results of a computation. This helps to understand how the algorithm behave for different situations." ] }, @@ -62,9 +63,17 @@ "| DK_final | IBI_MULTIYEAR_PHY_005_002 | latitude : 40,56- longitude -13,5 |Diff | In this folder, there is the data for all the DK tags. The corrected ones that from DK_3 and the ones that were already correct from the beginning. |" ] }, + { + "cell_type": "markdown", + "id": "9b00adfd-f28d-406a-8b9a-d977570137e1", + "metadata": {}, + "source": [ + "In the table above, a wrong track is considered as a track that presents incoherence or where the algorithm that estimates the fish speeds has not converged. In a nutshell, these are the tracks we know for sure that they are wrong, but they might be others that are wrong too." + ] + }, { "cell_type": "raw", - "id": "7112a4cf-be81-4c65-9c83-12f23c0d5751", + "id": "839ec218-4c42-4f27-b23a-99547552440b", "metadata": {}, "source": [ "# Install pangeo fish if necessary\n", @@ -79,24 +88,59 @@ "outputs": [], "source": [ "# Import necessary libraries and modules.\n", - "import os\n", + "import hvplot.xarray # noqa\n", "import xarray as xr\n", - "from pint_xarray import unit_registry as ureg\n", "from pangeo_fish.io import open_tag\n", - "import hvplot.xarray\n", "import pandas as pd\n", - "import geopandas as gpd\n", "import movingpandas as mpd\n", - "import hvplot.pandas\n", - "from pangeo_fish.tags import adapt_model_time, reshape_by_bins, to_time_slice\n", + "from pangeo_fish.tags import to_time_slice\n", "import holoviews as hv\n", - "import cmocean\n", "import panel as pn\n", - "import numpy as np\n", "from pangeo_fish.io import read_trajectories\n", "from pangeo_fish import visualization\n", - "import s3fs\n", - "import json" + "import s3fs" + ] + }, + { + "cell_type": "markdown", + "id": "9d9a35eb-12a1-4db6-a23d-fbe68ff2f590", + "metadata": {}, + "source": [ + "### Parameters Explanation\n", + "\n", + "This section of the code is responsible for setting up the necessary parameters and configurations to access and process the data for analysis. Below is an explanation of each parameter:\n", + "\n", + "1. **S3 Filesystem Setup (`s3`)**:\n", + " - This configures access to an S3-like storage system (OVH cloud in this case). It sets up authentication and defines the endpoint URL to access the data.\n", + "\n", + "2. **`generation_name`**:\n", + " - This variable defines the folder name where the results are stored. You can update this to change the dataset being accessed.\n", + "\n", + "3. **`remote_path`**:\n", + " - The base path to the folder where the tags are stored in the S3 bucket. It points to the \"bargip\" subdirectory under the \"gfts-ifremer/tags\" folder.\n", + "\n", + "4. **`tag_list_` and `tag_list`**:\n", + " - These variables list all available tags within the specified folder (determined by `generation_name`). The tags are cleaned to only contain the relevant part of the path.\n", + "\n", + "5. **`cloud_root`**:\n", + " - Specifies the root URL for tag data stored in the cloud (S3). This is the base location where all files for the analysis are stored.\n", + "\n", + "6. **`tag_root`**:\n", + " - Defines the root URL where the cleaned tag data, used for computation, is located. This is derived from `cloud_root` and the \"cleaned\" folder.\n", + "\n", + "7. **`scratch_root`**:\n", + " - Specifies the directory where the GFTS computation data is stored. It combines the `cloud_root` with the folder for the current generation of tracks.\n", + "\n", + "8. **`storage_options`**:\n", + " - Contains the options used to configure the storage system. \n", + "\n", + "9. **`bbox` (Bounding Box)**:\n", + " - Defines the geographical region (latitude and longitude range) for which the analysis is focused. The values here cover a region in the Atlantic Ocean.\n", + "\n", + "10. **`track_modes`**:\n", + " - Specifies the two types of tracks that have been computed for GFTS: \"mean\" and \"mode\". These represent different methods of track analysis for the data.\n", + "\n", + "Each of these parameters sets up essential parts of the data access and storage for running the analysis on the fish tracking data.\n" ] }, { @@ -122,7 +166,11 @@ "# Tag list is the list of available tags\n", "remote_path = \"gfts-ifremer/tags/bargip\"\n", "tag_list_ = s3.ls(f\"{remote_path}/{generation_name}\")\n", - "tag_list = [tag.replace(f\"{remote_path}/{generation_name}/\",\"\") for tag in tag_list_ if tag.replace(f\"{remote_path}/{generation_name}/\",\"\")]\n", + "tag_list = [\n", + " tag.replace(f\"{remote_path}/{generation_name}/\", \"\")\n", + " for tag in tag_list_\n", + " if tag.replace(f\"{remote_path}/{generation_name}/\", \"\")\n", + "]\n", "cloud_root = \"s3://gfts-ifremer/tags/bargip\"\n", "\n", "# tag_root specifies the root URL for tag data used for this computation.\n", @@ -133,24 +181,46 @@ "\n", "# storage_options specifies options for the filesystem storing and/or opening output files.\n", "storage_options = {\n", - " 'anon': False, \n", + " \"anon\": False,\n", " # 'profile' : \"gfts\",\n", - " 'client_kwargs': {\n", + " \"client_kwargs\": {\n", " \"endpoint_url\": \"https://s3.gra.perf.cloud.ovh.net\",\n", " \"region_name\": \"gra\",\n", - " }\n", + " },\n", "}\n", "\n", "# bbox, bounding box, defines the latitude and longitude range for the analysis area.\n", - "bbox = {\"latitude\": [40, 56], \"longitude\": [-13, 5]} \n", + "bbox = {\"latitude\": [40, 56], \"longitude\": [-13, 5]}\n", "# tramodes are the two types of track that have been computed for GFTS.\n", "track_modes = [\"mean\", \"mode\"]" ] }, + { + "cell_type": "markdown", + "id": "b4ec395d-9573-493b-99a5-5025b8075cee", + "metadata": {}, + "source": [ + "Here are short descriptions of what each function plots:\n", + "\n", + "1. **`plot_time_series`**:\n", + " - Plots a time series of temperature, depth, and the fish's latitude and longitude over time. It visualizes how these parameters change throughout the tracking period for a given tag ID.\n", + "\n", + "2. **`plot_anomaly`**:\n", + " - Detects and plots significant temperature anomalies (sudden rises) over time for a given tag ID. It highlights these anomalies on top of the normal temperature time series.\n", + "\n", + "3. **`plot_track`**:\n", + " - Plots the movement track of a fish (mean and mode trajectories) on a map, color-coded by month. It shows the fish’s path over time for a given tag ID.\n", + "\n", + "4. **`plot_emission`**:\n", + " - Plots the emission probability and states on a map for a specific time range. It compares these two datasets to visualize the fish's possible states and their corresponding emissions.\n", + "\n", + "You can add these short explanations as comments or markdown in your notebook to describe what each function does." + ] + }, { "cell_type": "code", "execution_count": null, - "id": "87a37132-cb90-42b7-8306-1cc9aa850bd3", + "id": "6d58b890-9178-42e0-9990-ef95e1e109f3", "metadata": {}, "outputs": [], "source": [ @@ -158,9 +228,11 @@ "# @pn.cache\n", "\n", "# Functions to plot the different visualization for a given tag id\n", - "def plot_time_series(plot_type=\"time series\",tag_id=\"CB_A11071\"):\n", - " # load trajectories \n", - " trajectories = read_trajectories(track_modes,f\"{scratch_root}/{tag_id}\",storage_options, format=\"parquet\")\n", + "def plot_time_series(plot_type=\"time series\", tag_id=\"CB_A11071\"):\n", + " # load trajectories\n", + " trajectories = read_trajectories(\n", + " track_modes, f\"{scratch_root}/{tag_id}\", storage_options, format=\"parquet\"\n", + " )\n", "\n", " # Converting the trajectories to pandas DataFrames to access data easily\n", " mean_df = trajectories.trajectories[0].df\n", @@ -170,30 +242,29 @@ " # time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", "\n", " time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", - " \n", + "\n", " time = tag[\"dst\"].ds.time\n", " cond = (time <= time_slice.stop) & (time >= time_slice.start)\n", - " \n", - " tag_log = tag[\"dst\"].ds.where(cond,drop=True)\n", - " \n", + "\n", + " tag_log = tag[\"dst\"].ds.where(cond, drop=True)\n", + "\n", " min_ = tag_log.time[0]\n", " max_ = tag_log.time[-1]\n", - " \n", + "\n", " time_slice = slice(min_.data, max_.data)\n", "\n", - " \n", - " tag_log = tag[\"dst\"].ds.sel(time=time_slice) \n", - " \n", + " tag_log = tag[\"dst\"].ds.sel(time=time_slice)\n", + "\n", " # Creating pandas series for xarrray dataset\n", - " mean_lon_ = pd.Series(mean_df.geometry.x,name=\"longitude\")\n", - " mean_lat_ = pd.Series(mean_df.geometry.y,name=\"latitude\")\n", - " mode_lon_ = pd.Series(mode_df.geometry.x,name=\"longitude\")\n", - " mode_lat_ = pd.Series(mode_df.geometry.y,name=\"latitude\")\n", - " \n", + " mean_lon_ = pd.Series(mean_df.geometry.x, name=\"longitude\")\n", + " mean_lat_ = pd.Series(mean_df.geometry.y, name=\"latitude\")\n", + " mode_lon_ = pd.Series(mode_df.geometry.x, name=\"longitude\")\n", + " mode_lat_ = pd.Series(mode_df.geometry.y, name=\"latitude\")\n", + "\n", " # Creating xarray datasets\n", " mean_coords = xr.Dataset(pd.concat([mean_lon_, mean_lat_], axis=1))\n", " mode_coords = xr.Dataset(pd.concat([mode_lon_, mode_lat_], axis=1))\n", - " \n", + "\n", " # Assigning dataarrays to variables\n", " mean_lon = mean_coords[\"longitude\"]\n", " mean_lat = mean_coords[\"latitude\"]\n", @@ -201,94 +272,91 @@ " mode_lat = mode_coords[\"latitude\"]\n", "\n", " tag_log[\"depth\"] = tag_log[\"pressure\"]\n", - " temp_plot = tag_log[\"temperature\"].hvplot(color=\"Red\",title=\"Temperature (°C)\",grid=True,height=200,width=600)\n", - " depth_plot = (-tag_log[\"depth\"]).hvplot(color =\"Blue\",title=\"Depth (m)\",grid=True,height=200,width=600)\n", - " lon_plot = (mean_lat.hvplot(label=\"mean\",clim=[mean_lat_.min(),mean_lat_.max()]) * mode_lat.hvplot(label = \"mode\",clim=[mode_lat_.min(),mean_lat_.max()])).opts(height=200,width=600,show_grid=True,title = \"Fish latitude over time\")\n", - " lat_plot = (mean_lon.hvplot(label=\"mean\",clim=[mean_lon_.min(),mean_lat_.max()]) * mode_lon.hvplot(label = \"mode\",clim=[mode_lon_.min(),mean_lat_.max()])).opts(height=200,width=600,show_grid=True,title = \"Fish longitude over time\")\n", + " temp_plot = tag_log[\"temperature\"].hvplot(\n", + " color=\"Red\", title=\"Temperature (°C)\", grid=True, height=200, width=600\n", + " )\n", + " depth_plot = (-tag_log[\"depth\"]).hvplot(\n", + " color=\"Blue\", title=\"Depth (m)\", grid=True, height=200, width=600\n", + " )\n", + " lon_plot = (\n", + " mean_lat.hvplot(label=\"mean\", clim=[mean_lat_.min(), mean_lat_.max()])\n", + " * mode_lat.hvplot(label=\"mode\", clim=[mode_lat_.min(), mean_lat_.max()])\n", + " ).opts(height=200, width=600, show_grid=True, title=\"Fish latitude over time\")\n", + " lat_plot = (\n", + " mean_lon.hvplot(label=\"mean\", clim=[mean_lon_.min(), mean_lat_.max()])\n", + " * mode_lon.hvplot(label=\"mode\", clim=[mode_lon_.min(), mean_lat_.max()])\n", + " ).opts(height=200, width=600, show_grid=True, title=\"Fish longitude over time\")\n", "\n", " return (temp_plot + depth_plot + lon_plot + lat_plot).cols(1)\n", "\n", "\n", - "def plot_anomaly(tag_id=\"CB_A11071\"):\n", - " tag = open_tag(tag_root, tag_id)\n", - " time_slice = to_time_slice(tag[\"tagging_events/time\"])\n", - " tag_log = tag[\"dst\"].ds.sel(time=time_slice) \n", - " ds = tag_log\n", - " # Calculate the differences\n", - " ds['temp_diff'] = ds.temperature.diff('time')\n", - " \n", - " # Define a threshold for significant temperature rise\n", - " threshold = 0.2\n", - " \n", - " # Identify significant rises\n", - " ds['event'] = ds['temp_diff'] > threshold\n", - " \n", - " # Extract significant points\n", - " significant_points = ds.where(ds['event'], drop=True)\n", - " \n", - " # Plot the time series\n", - " time_series_plot = ds.temperature.hvplot(line_color='blue', label='Temperature',width=1000,height=500)\n", - " \n", - " # Plot the significant rise points\n", - " significant_points_plot = significant_points.temperature.hvplot.scatter(\n", - " color='red', marker='x', size=100, label='Significant Rise',width=1000,height=500\n", - " )\n", - " \n", - " # Display the combined plot\n", - " return hv.Overlay([time_series_plot, significant_points_plot]).opts(width=1000,height=500)\n", - "\n", "def plot_track(tag_id=\"CB_A11071\"):\n", - " sigma = pd.read_json(f\"{scratch_root}/{tag_id}/parameters.json\").to_dict()[0][\"sigma\"]\n", - " trajectories = read_trajectories(track_modes,f\"{scratch_root}/{tag_id}\",storage_options, format=\"parquet\")\n", + " sigma = pd.read_json(f\"{scratch_root}/{tag_id}/parameters.json\").to_dict()[0][\n", + " \"sigma\"\n", + " ]\n", + " trajectories = read_trajectories(\n", + " track_modes, f\"{scratch_root}/{tag_id}\", storage_options, format=\"parquet\"\n", + " )\n", "\n", " # Converting the trajectories to pandas DataFrames to access data easily\n", " mean_df = trajectories.trajectories[0].df\n", " mode_df = trajectories.trajectories[1].df\n", - " \n", + "\n", " # Adding month data\n", " mean_df[\"month\"] = mean_df.index.month\n", " mode_df[\"month\"] = mode_df.index.month\n", - " \n", + "\n", " # Converting back to trajectories\n", - " mean_traj = mpd.Trajectory(mean_df,traj_id=mean_df.traj_id.drop_duplicates().values[0])\n", - " mode_traj = mpd.Trajectory(mode_df,traj_id=mode_df.traj_id.drop_duplicates().values[0])\n", - " trajectories = mpd.TrajectoryCollection([mean_traj,mode_traj])\n", - " \n", + " mean_traj = mpd.Trajectory(\n", + " mean_df, traj_id=mean_df.traj_id.drop_duplicates().values[0]\n", + " )\n", + " mode_traj = mpd.Trajectory(\n", + " mode_df, traj_id=mode_df.traj_id.drop_duplicates().values[0]\n", + " )\n", + " trajectories = mpd.TrajectoryCollection([mean_traj, mode_traj])\n", + "\n", " traj_plots = [\n", - " traj.hvplot(c=\"month\",tiles=\"CartoLight\",cmap=\"rainbow\", title=f\"{tag_id} , {traj.id}, {sigma}\",width=375,height=375)\n", + " traj.hvplot(\n", + " c=\"month\",\n", + " tiles=\"CartoLight\",\n", + " cmap=\"rainbow\",\n", + " title=f\"{tag_id} , {traj.id}, {sigma}\",\n", + " width=375,\n", + " height=375,\n", + " )\n", " for traj in trajectories.trajectories\n", " ]\n", - " \n", + "\n", " return hv.Layout(traj_plots).cols(1)\n", - " \n", + "\n", + "\n", "def plot_emission(tag_id=\"CB_A11071\"):\n", " ## Might not work if dask involved or slider involved, I have to test\n", - " emission = (\n", - " xr.open_dataset(\n", - " f\"{scratch_root}/{tag_id}/combined.zarr\",\n", - " engine=\"zarr\",\n", - " chunks={},\n", - " inline_array=True,\n", - " storage_options=storage_options,\n", - " )\n", - " .rename_vars({\"pdf\": \"emission\"})\n", - " )\n", - " \n", - " states = (\n", - " xr.open_dataset(\n", - " f\"{scratch_root}/{tag_id}/states.zarr\", \n", - " engine=\"zarr\", \n", - " chunks={}, \n", - " inline_array=True,\n", - " storage_options=storage_options,\n", - " ).where(emission[\"mask\"])\n", - " )\n", - " \n", + " emission = xr.open_dataset(\n", + " f\"{scratch_root}/{tag_id}/combined.zarr\",\n", + " engine=\"zarr\",\n", + " chunks={},\n", + " inline_array=True,\n", + " storage_options=storage_options,\n", + " ).rename_vars({\"pdf\": \"emission\"})\n", + "\n", + " states = xr.open_dataset(\n", + " f\"{scratch_root}/{tag_id}/states.zarr\",\n", + " engine=\"zarr\",\n", + " chunks={},\n", + " inline_array=True,\n", + " storage_options=storage_options,\n", + " ).where(emission[\"mask\"])\n", + "\n", " data = xr.merge([states, emission.drop_vars([\"mask\"])])\n", - " plot1 = visualization.plot_map(data[\"states\"].sel(time=slice(\"2015-09-04\",\"2015-09-10\")),bbox,cmap=\"cool\").opts(height=350,width=600)\n", - " plot2 = visualization.plot_map(data[\"emission\"].sel(time=slice(\"2015-09-04\",\"2015-09-10\")),bbox,cmap=\"cool\").opts(height=350,width=600)\n", - " plot=hv.Layout([plot1, plot2]).cols(1) \n", - " \n", + " plot1 = visualization.plot_map(\n", + " data[\"states\"].sel(time=slice(\"2015-09-04\", \"2015-09-10\")), bbox, cmap=\"cool\"\n", + " ).opts(height=350, width=600)\n", + " plot2 = visualization.plot_map(\n", + " data[\"emission\"].sel(time=slice(\"2015-09-04\", \"2015-09-10\")), bbox, cmap=\"cool\"\n", + " ).opts(height=350, width=600)\n", + " plot = hv.Layout([plot1, plot2]).cols(1)\n", + "\n", " return plot" ] }, @@ -300,20 +368,20 @@ "outputs": [], "source": [ "# Panel parameters\n", - "value=tag_list[0]\n", - "#Initalizing the widget for tag selection \n", + "value = tag_list[0]\n", + "# Initalizing the widget for tag selection\n", "tag_widget = pn.widgets.Select(name=\"tag_id\", value=value, options=tag_list)\n", "\n", - "#Binding widget with the plots\n", + "# Binding widget with the plots\n", "\n", - "time_plot = pn.bind(plot_time_series,tag_id=tag_widget)\n", - "track_plot = pn.bind(plot_track,tag_id=tag_widget)\n", - "# Commenting emission because it's too long to load panel \n", + "time_plot = pn.bind(plot_time_series, tag_id=tag_widget)\n", + "track_plot = pn.bind(plot_track, tag_id=tag_widget)\n", + "# Commenting emission because it's too long to load panel\n", "# emission_plot = pn.bind(plot_emission,tag_id=tag_widget)\n", - "track_emission = pn.Row(time_plot,track_plot)\n", + "track_emission = pn.Row(time_plot, track_plot)\n", "\n", - "#Combining plots with the widget\n", - "plots = pn.Row(tag_widget,track_emission)\n", + "# Combining plots with the widget\n", + "plots = pn.Row(tag_widget, track_emission)\n", "\n", "pn.template.FastListTemplate(\n", " site=\"Tag data display\",\n", @@ -325,7 +393,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6ff000e3-47dd-41fd-9cbb-cd22a5193ca7", + "id": "9fcd1a79-6125-4f6e-882c-2be556d68128", "metadata": {}, "outputs": [], "source": [ @@ -335,7 +403,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9fcd1a79-6125-4f6e-882c-2be556d68128", + "id": "46038ad1-c298-4455-84f0-bc7f19a5f509", "metadata": {}, "outputs": [], "source": []