diff --git a/notebooks/excarta_temp_proc_analysis.ipynb b/notebooks/excarta_temp_proc_analysis.ipynb
new file mode 100644
index 0000000..6072560
--- /dev/null
+++ b/notebooks/excarta_temp_proc_analysis.ipynb
@@ -0,0 +1,2271 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ce9a1598",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import libs\n",
+ "import xarray as xr\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import datetime\n",
+ "import os\n",
+ "import pathlib as Path\n",
+ "from datetime import datetime\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d5c494a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sort the data into the correct format\n",
+ "def data_loader(folder_path):\n",
+ " \n",
+ " column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']\n",
+ " \n",
+ " files = os.listdir(folder_path)\n",
+ " \n",
+ " dfs = []\n",
+ " \n",
+ " for filename in files:\n",
+ " if filename.endswith(\".csv\") and not filename.startswith(\"._\"):\n",
+ " file_path = os.path.join(folder_path, filename)\n",
+ " print(file_path)\n",
+ " print(filename)\n",
+ " df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])\n",
+ " \n",
+ " # Remove the .csv\n",
+ " datetime_str = filename[:-4] \n",
+ " # Convert the filename to a datetime object\n",
+ " datetime_obj = datetime.strptime(datetime_str, \"%Y%m%d%H\")\n",
+ "\n",
+ " df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600 # convert timedelta to hours\n",
+ " \n",
+ " df['init_time'] = datetime_obj\n",
+ " \n",
+ " dfs.append(df)\n",
+ " \n",
+ " \n",
+ " return dfs\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "231ff600",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_data_from_all_years(parent_folder_path):\n",
+ " # Initialize an empty list to store the dataframes\n",
+ " all_dataframes = []\n",
+ "\n",
+ " # Loop over each year's folder and call the folder_data_load_sorted function\n",
+ " for year in range(2018, 2019):\n",
+ " folder_path = os.path.join(parent_folder_path, str(year))\n",
+ " dataframes = data_loader(folder_path)\n",
+ " all_dataframes.extend(dataframes)\n",
+ "\n",
+ " return all_dataframes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e283f859",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def pdtocdf(dfs):\n",
+ " \n",
+ " merged_df = pd.concat(dfs, ignore_index=True)\n",
+ " \n",
+ " ds = xr.Dataset.from_dataframe(merged_df)\n",
+ " \n",
+ " ds = ds.set_index(index=['init_time', 'step','Latitude','Longitude']).unstack('index')\n",
+ " \n",
+ " \n",
+ "# ds = ds.assign_coords(latitude=ds[\"Latitude\"])\n",
+ "# ds = ds.assign_coords(longitude=ds[\"Longitude\"])\n",
+ " \n",
+ "# ds = ds.drop(\"Latitude\")\n",
+ "# ds = ds.drop(\"Longitude\")\n",
+ " \n",
+ " \n",
+ " ds = ds.drop_vars([\"LocationId\", \"DateTimeUTC\"])\n",
+ " \n",
+ " var_names = ds.data_vars\n",
+ " d2 = xr.concat([ds[v] for v in var_names], dim=\"variable\")\n",
+ "\n",
+ " # Set the coordinates to keep the names of the variables.\n",
+ " d2 = d2.assign_coords(variable=(\"variable\", var_names))\n",
+ "\n",
+ " # Turn the xr.DataArray into a xr.Dataset.\n",
+ " ds = xr.Dataset(dict(value=d2))\n",
+ "\n",
+ " #When datesets are merged the steps can be out of order, so we sort them\n",
+ " ds = ds.sortby('step')\n",
+ " ds = ds.sortby('init_time')\n",
+ "\n",
+ " \n",
+ "# ds = ds.rename({\"UtcDatetime\": \"ts\", \"SiteId\": \"pv_id\"})\n",
+ " \n",
+ "# ds = ds.drop_vars([\"Unnamed: 0\", \"PostCode\"])\n",
+ "# ds = ds.rename({\"UtcDatetime\": \"ts\", \"SiteId\": \"pv_id\"})\n",
+ " ds = ds.rename({\"Latitude\": \"latitude\", \"Longitude\": \"longitude\"})\n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " # Convert 'step' and 'ts' variables to dimensions\n",
+ " # Drop the 'dim_0' dimension\n",
+ "# ds = ds.set_coords(\"ts\").swap_dims({\"dim_0\": \"ts\"})\n",
+ "\n",
+ "# ds = ds.drop(\"dim_0\")\n",
+ "\n",
+ "# # Set 'step' and 'ts' as dimensions\n",
+ "# ds = ds.set_coords('step')\n",
+ " \n",
+ "# ds = ds.set_coords('Latitude')\n",
+ "# ds = ds.set_coords('Longitude')\n",
+ " \n",
+ " return ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2ae411d6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main():\n",
+ " PATH = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data\"\n",
+ " dfs = load_data_from_all_years(PATH)\n",
+ " ds = pdtocdf(dfs)\n",
+ "# ds.to_zarr('excarta_SI.nc')\n",
+ " return ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8c8de7bb",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "ds = main()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f0e118d3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9afbd533",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = data_loader(\"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data/2021\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c04a005f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c6cea281",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# root_dir = Path(\"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/solar_radiation_UK_Malta_17_23_1calcperday/solar_data\")\n",
+ "PATH = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data\"\n",
+ "\n",
+ "YEARS = [2018,2019,2020,2021,2022]\n",
+ "column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "14d832c3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "NWP_PATH = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/clients/island/nwp_v8.zarr\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "534abcb7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the data as an xarray dataset\n",
+ "dataset = xr.open_dataset(NWP_PATH)\n",
+ "dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f06d18d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Low memory script\n",
+ "import os\n",
+ "from datetime import datetime\n",
+ "import pandas as pd\n",
+ "import xarray as xr\n",
+ "import argparse\n",
+ "import pathlib\n",
+ "\n",
+ "\n",
+ "# def _parse_args():\n",
+ "# parser = argparse.ArgumentParser()\n",
+ "# parser.add_argument(\"output\", type=pathlib.Path, help=\"Output zarr file\")\n",
+ "# return parser.parse_args()\n",
+ "\n",
+ "\n",
+ "def data_loader(folder_path):\n",
+ " \"\"\"\n",
+ " Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.\n",
+ " \"\"\"\n",
+ " column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']\n",
+ " files = os.listdir(folder_path)\n",
+ " datasets = []\n",
+ "\n",
+ " for filename in files:\n",
+ " if filename.endswith(\".csv\") and not filename.startswith(\"._\"):\n",
+ " file_path = os.path.join(folder_path, filename)\n",
+ "\n",
+ " df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])\n",
+ " datetime_str = filename[:-4]\n",
+ " datetime_obj = datetime.strptime(datetime_str, \"%Y%m%d%H\")\n",
+ " df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600 # convert timedelta to hours\n",
+ " df['init_time'] = datetime_obj\n",
+ "\n",
+ " # Convert the dataframe to an xarray Dataset and append to the list\n",
+ " ds = xr.Dataset.from_dataframe(df)\n",
+ " ds = ds.drop_vars([\"LocationId\", \"DateTimeUTC\"])\n",
+ " datasets.append(ds)\n",
+ "\n",
+ " return datasets\n",
+ "\n",
+ "\n",
+ "def load_data_from_all_years(parent_folder_path):\n",
+ " all_datasets = []\n",
+ "\n",
+ " for year in range(2026, 2027):\n",
+ " folder_path = os.path.join(parent_folder_path, str(year))\n",
+ " datasets = data_loader(folder_path)\n",
+ " all_datasets.extend(datasets)\n",
+ "\n",
+ " return all_datasets\n",
+ "\n",
+ "\n",
+ "def pdtocdf(datasets):\n",
+ " \"\"\"\n",
+ " Processes the xarray Datasets and merges them.\n",
+ " \"\"\"\n",
+ " print(datasets)\n",
+ "# ds = xr.merge(datasets)\n",
+ "\n",
+ "# ds = ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']).unstack('index')\n",
+ " # Step 1: Set 'init_time', 'step', 'latitude', and 'longitude' as a multi-index\n",
+ " datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]\n",
+ "\n",
+ " # Step 2: Concatenate all Datasets along the new multi-index\n",
+ " ds = xr.concat(datasets, dim='index')\n",
+ " \n",
+ " var_names = ds.data_vars\n",
+ " d2 = xr.concat([ds[v] for v in var_names], dim=\"variable\")\n",
+ " d2 = d2.assign_coords(variable=(\"variable\", var_names))\n",
+ " ds = xr.Dataset(dict(value=d2))\n",
+ " ds = ds.sortby('step')\n",
+ " ds = ds.sortby('init_time')\n",
+ " ds = ds.sortby('Latitude')\n",
+ " ds = ds.sortby('Longitude')\n",
+ " ds = ds.rename({\"Latitude\": \"y\", \"Longitude\": \"x\"})\n",
+ "\n",
+ " return ds\n",
+ "\n",
+ "\n",
+ "def main():\n",
+ "# # args = _parse_args()\n",
+ "\n",
+ "# if args.output.exists():\n",
+ "# raise RuntimeError(f'Output file \"{args.output}\" already exist')\n",
+ "\n",
+ " PATH = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data\"\n",
+ " datasets = load_data_from_all_years(PATH)\n",
+ " ds = pdtocdf(datasets)\n",
+ "# ds.to_zarr(args.output)\n",
+ " \n",
+ " return ds\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bb63507e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds = main()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d9801e9e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "552c437b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "ds= unstacked_ds\n",
+ "\n",
+ "# Select the data for a given init_time, x, and y\n",
+ "selected_data = ds['value'].sel(variable='dhi', init_time='2019-09-27T00', x=13.250000, y=35.250000)#,method='nearest')\n",
+ "\n",
+ "# Create a plot\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "selected_data.plot.line('o-')\n",
+ "plt.title('dhi vs step')\n",
+ "plt.xlabel('step')\n",
+ "plt.ylabel('dhi')\n",
+ "plt.grid(True)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cb3bd109",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(selected_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fb5d8a46",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unstacked_ds = ds.unstack('index')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0e054268",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unstacked_ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d80b5056",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "ds= combined_ds\n",
+ "\n",
+ "# Select the data for a given init_time, x, and y\n",
+ "selected_data = ds['value'].sel(variable='dhi', init_time='2019-09-27T00', x=13.250000, y=35.250000)#,method='nearest')\n",
+ "\n",
+ "# Create a plot\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "selected_data.plot.line('o-')\n",
+ "plt.title('dhi vs step')\n",
+ "plt.xlabel('step')\n",
+ "plt.ylabel('dhi')\n",
+ "plt.grid(True)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "55c99753",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Drop all records that have a NaN value in the 'value' variable\n",
+ "combined_ds = unstacked_ds.dropna(dim='variable', subset=['value'])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6263d208",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "combined_ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "42a3a421",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unstacked_ds.to_zarr('/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/zarr_format/a1_2018_only.zarr')\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36eb98de",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#read\n",
+ "path_2017 = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/zarr_format/b1_2017_only_t2.zarr\"\n",
+ "dataset_2017 = xr.open_zarr(path_2017)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e6021055",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset_2017"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "da155339",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# I could load this data into my nwp dashboard to explore the data\n",
+ "import matplotlib.pyplot as plt\n",
+ "import xarray as xr\n",
+ "\n",
+ "data = dataset_2017\n",
+ "\n",
+ "# Assuming your dataset is named 'data'\n",
+ "# Selecting a specific variable, init_time, and step\n",
+ "selected_data = data.sel(variable='ghi', init_time='2017-12-31T00', step=1)\n",
+ "\n",
+ "# Accessing the values\n",
+ "values = selected_data['value']\n",
+ "\n",
+ "values\n",
+ "# Plotting the values\n",
+ "# plt.imshow(values, origin='lower')\n",
+ "# plt.colorbar(label='Value')\n",
+ "# plt.title('Plot of GHI')\n",
+ "# plt.xlabel('x')\n",
+ "# plt.ylabel('y')\n",
+ "# plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fb2df18c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Assuming your dataset is named 'data'\n",
+ "# Selecting a specific latitude and longitude\n",
+ "latitude = 49\n",
+ "longitude = -8\n",
+ "\n",
+ "\n",
+ "selected_data = data.sel(x=longitude, y=latitude,method='nearest')\n",
+ "\n",
+ "# Selecting the specific variable\n",
+ "variable = 'dni'\n",
+ "selected_variable_data = selected_data.sel(variable=variable, init_time='2017-12-31T06')\n",
+ "\n",
+ "# Accessing the values and step dimensions\n",
+ "values = selected_variable_data['value'].values\n",
+ "steps = selected_variable_data['step']\n",
+ "\n",
+ "print(\"---\")\n",
+ "print(values)\n",
+ "print(\"---\")\n",
+ "print(steps)\n",
+ "# Plotting the line chart\n",
+ "plt.plot(values, steps)\n",
+ "plt.title(f'Line Chart of {variable.upper()} at Latitude: {latitude}, Longitude: {longitude}')\n",
+ "plt.xlabel('Step')\n",
+ "plt.ylabel(variable.upper())\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "57cd69e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Tsting monthly data\n",
+ "\n",
+ "ds = xr.load_dataset(\"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/zarr_format/r2201802.zarr\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d0252137",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a8f099d7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Select the data for a given init_time, x, and y\n",
+ "selected_data = ds['value'].sel(variable='dhi', init_time='2018-02-01T00', x=13.250000, y=35.250000)#,method='nearest')\n",
+ "\n",
+ "# Create a plot\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "selected_data.plot.line('o-')\n",
+ "plt.title('dhi vs step')\n",
+ "plt.xlabel('step')\n",
+ "plt.ylabel('dhi')\n",
+ "plt.grid(True)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f27cefdb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "695c48bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import xarray as xr\n",
+ "import os\n",
+ "\n",
+ "def merge_zarr_files(zarr_path, merged_zarr_path):\n",
+ " # Collect paths of Zarr files in the specified directory\n",
+ " zarr_files = [os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith('.zarr')]\n",
+ "\n",
+ " print(\"1\")\n",
+ " # Open the first Zarr file to create the initial dataset\n",
+ " merged_ds = xr.open_zarr(zarr_files[0])\n",
+ " \n",
+ " print(\"2\")\n",
+ "\n",
+ " # Iterate over the remaining Zarr files and merge them into the initial dataset\n",
+ " for file in zarr_files[1:]:\n",
+ " ds = xr.open_zarr(file)\n",
+ " merged_ds = merged_ds.combine_first(ds)\n",
+ " \n",
+ " print(\"3\")\n",
+ "\n",
+ " # Rechunk the merged dataset\n",
+ " merged_ds = merged_ds.chunk(chunks={\"init_time\": 10, \"x\": 100, \"y\": 100})\n",
+ " \n",
+ " print(\"4\")\n",
+ " \n",
+ " # Define the specific range of x and y coordinates\n",
+ " x_range = (-10, 2) # Example x coordinate range\n",
+ " y_range = (49, 59) # Example y coordinate range\n",
+ "\n",
+ " filtered_dataset = merged_ds.sel(x=slice(*x_range), y=slice(*y_range))\n",
+ " \n",
+ " print(filtered_dataset)\n",
+ "\n",
+ " # Save the merged dataset as a new Zarr file\n",
+ " filtered_dataset.to_zarr(merged_zarr_path)\n",
+ " \n",
+ " print(\"5\")\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ "# Specify the path where the independent Zarr files are located\n",
+ "zarr_path = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/zarr_format/r3_test_merge\"\n",
+ "\n",
+ "# Specify the path for the merged Zarr file\n",
+ "merged_zarr_path = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/merged_excarta/merged_3_r3_test_merge_ukfilt_t2.zarr\"\n",
+ "\n",
+ "# Merge the Zarr files\n",
+ "merge_zarr_files(zarr_path, merged_zarr_path)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "id": "d250308d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
<xarray.Dataset>\n",
+ "Dimensions: (init_time: 124, step: 168, variable: 3, y: 122, x: 122)\n",
+ "Coordinates:\n",
+ " * init_time (init_time) datetime64[ns] 2022-05-01 ... 2022-05-31T18:00:00\n",
+ " * step (step) float64 1.0 2.0 3.0 4.0 5.0 ... 165.0 166.0 167.0 168.0\n",
+ " * variable (variable) <U3 'dni' 'dhi' 'ghi'\n",
+ " * x (x) float64 -9.879 -9.758 -9.636 -9.515 ... 14.83 14.92 15.0\n",
+ " * y (y) float64 35.0 35.08 35.17 35.25 35.33 ... 58.7 58.8 58.9 59.0\n",
+ "Data variables:\n",
+ " value (variable, init_time, step, y, x) float64 dask.array<chunksize=(1, 16, 42, 31, 31), meta=np.ndarray>
- init_time: 124
- step: 168
- variable: 3
- y: 122
- x: 122
init_time
(init_time)
datetime64[ns]
2022-05-01 ... 2022-05-31T18:00:00
array(['2022-05-01T00:00:00.000000000', '2022-05-01T06:00:00.000000000',\n",
+ " '2022-05-01T12:00:00.000000000', '2022-05-01T18:00:00.000000000',\n",
+ " '2022-05-02T00:00:00.000000000', '2022-05-02T06:00:00.000000000',\n",
+ " '2022-05-02T12:00:00.000000000', '2022-05-02T18:00:00.000000000',\n",
+ " '2022-05-03T00:00:00.000000000', '2022-05-03T06:00:00.000000000',\n",
+ " '2022-05-03T12:00:00.000000000', '2022-05-03T18:00:00.000000000',\n",
+ " '2022-05-04T00:00:00.000000000', '2022-05-04T06:00:00.000000000',\n",
+ " '2022-05-04T12:00:00.000000000', '2022-05-04T18:00:00.000000000',\n",
+ " '2022-05-05T00:00:00.000000000', '2022-05-05T06:00:00.000000000',\n",
+ " '2022-05-05T12:00:00.000000000', '2022-05-05T18:00:00.000000000',\n",
+ " '2022-05-06T00:00:00.000000000', '2022-05-06T06:00:00.000000000',\n",
+ " '2022-05-06T12:00:00.000000000', '2022-05-06T18:00:00.000000000',\n",
+ " '2022-05-07T00:00:00.000000000', '2022-05-07T06:00:00.000000000',\n",
+ " '2022-05-07T12:00:00.000000000', '2022-05-07T18:00:00.000000000',\n",
+ " '2022-05-08T00:00:00.000000000', '2022-05-08T06:00:00.000000000',\n",
+ " '2022-05-08T12:00:00.000000000', '2022-05-08T18:00:00.000000000',\n",
+ " '2022-05-09T00:00:00.000000000', '2022-05-09T06:00:00.000000000',\n",
+ " '2022-05-09T12:00:00.000000000', '2022-05-09T18:00:00.000000000',\n",
+ " '2022-05-10T00:00:00.000000000', '2022-05-10T06:00:00.000000000',\n",
+ " '2022-05-10T12:00:00.000000000', '2022-05-10T18:00:00.000000000',\n",
+ " '2022-05-11T00:00:00.000000000', '2022-05-11T06:00:00.000000000',\n",
+ " '2022-05-11T12:00:00.000000000', '2022-05-11T18:00:00.000000000',\n",
+ " '2022-05-12T00:00:00.000000000', '2022-05-12T06:00:00.000000000',\n",
+ " '2022-05-12T12:00:00.000000000', '2022-05-12T18:00:00.000000000',\n",
+ " '2022-05-13T00:00:00.000000000', '2022-05-13T06:00:00.000000000',\n",
+ " '2022-05-13T12:00:00.000000000', '2022-05-13T18:00:00.000000000',\n",
+ " '2022-05-14T00:00:00.000000000', '2022-05-14T06:00:00.000000000',\n",
+ " '2022-05-14T12:00:00.000000000', '2022-05-14T18:00:00.000000000',\n",
+ " '2022-05-15T00:00:00.000000000', '2022-05-15T06:00:00.000000000',\n",
+ " '2022-05-15T12:00:00.000000000', '2022-05-15T18:00:00.000000000',\n",
+ " '2022-05-16T00:00:00.000000000', '2022-05-16T06:00:00.000000000',\n",
+ " '2022-05-16T12:00:00.000000000', '2022-05-16T18:00:00.000000000',\n",
+ " '2022-05-17T00:00:00.000000000', '2022-05-17T06:00:00.000000000',\n",
+ " '2022-05-17T12:00:00.000000000', '2022-05-17T18:00:00.000000000',\n",
+ " '2022-05-18T00:00:00.000000000', '2022-05-18T06:00:00.000000000',\n",
+ " '2022-05-18T12:00:00.000000000', '2022-05-18T18:00:00.000000000',\n",
+ " '2022-05-19T00:00:00.000000000', '2022-05-19T06:00:00.000000000',\n",
+ " '2022-05-19T12:00:00.000000000', '2022-05-19T18:00:00.000000000',\n",
+ " '2022-05-20T00:00:00.000000000', '2022-05-20T06:00:00.000000000',\n",
+ " '2022-05-20T12:00:00.000000000', '2022-05-20T18:00:00.000000000',\n",
+ " '2022-05-21T00:00:00.000000000', '2022-05-21T06:00:00.000000000',\n",
+ " '2022-05-21T12:00:00.000000000', '2022-05-21T18:00:00.000000000',\n",
+ " '2022-05-22T00:00:00.000000000', '2022-05-22T06:00:00.000000000',\n",
+ " '2022-05-22T12:00:00.000000000', '2022-05-22T18:00:00.000000000',\n",
+ " '2022-05-23T00:00:00.000000000', '2022-05-23T06:00:00.000000000',\n",
+ " '2022-05-23T12:00:00.000000000', '2022-05-23T18:00:00.000000000',\n",
+ " '2022-05-24T00:00:00.000000000', '2022-05-24T06:00:00.000000000',\n",
+ " '2022-05-24T12:00:00.000000000', '2022-05-24T18:00:00.000000000',\n",
+ " '2022-05-25T00:00:00.000000000', '2022-05-25T06:00:00.000000000',\n",
+ " '2022-05-25T12:00:00.000000000', '2022-05-25T18:00:00.000000000',\n",
+ " '2022-05-26T00:00:00.000000000', '2022-05-26T06:00:00.000000000',\n",
+ " '2022-05-26T12:00:00.000000000', '2022-05-26T18:00:00.000000000',\n",
+ " '2022-05-27T00:00:00.000000000', '2022-05-27T06:00:00.000000000',\n",
+ " '2022-05-27T12:00:00.000000000', '2022-05-27T18:00:00.000000000',\n",
+ " '2022-05-28T00:00:00.000000000', '2022-05-28T06:00:00.000000000',\n",
+ " '2022-05-28T12:00:00.000000000', '2022-05-28T18:00:00.000000000',\n",
+ " '2022-05-29T00:00:00.000000000', '2022-05-29T06:00:00.000000000',\n",
+ " '2022-05-29T12:00:00.000000000', '2022-05-29T18:00:00.000000000',\n",
+ " '2022-05-30T00:00:00.000000000', '2022-05-30T06:00:00.000000000',\n",
+ " '2022-05-30T12:00:00.000000000', '2022-05-30T18:00:00.000000000',\n",
+ " '2022-05-31T00:00:00.000000000', '2022-05-31T06:00:00.000000000',\n",
+ " '2022-05-31T12:00:00.000000000', '2022-05-31T18:00:00.000000000'],\n",
+ " dtype='datetime64[ns]')
step
(step)
float64
1.0 2.0 3.0 ... 166.0 167.0 168.0
array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,\n",
+ " 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,\n",
+ " 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36.,\n",
+ " 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48.,\n",
+ " 49., 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., 60.,\n",
+ " 61., 62., 63., 64., 65., 66., 67., 68., 69., 70., 71., 72.,\n",
+ " 73., 74., 75., 76., 77., 78., 79., 80., 81., 82., 83., 84.,\n",
+ " 85., 86., 87., 88., 89., 90., 91., 92., 93., 94., 95., 96.,\n",
+ " 97., 98., 99., 100., 101., 102., 103., 104., 105., 106., 107., 108.,\n",
+ " 109., 110., 111., 112., 113., 114., 115., 116., 117., 118., 119., 120.,\n",
+ " 121., 122., 123., 124., 125., 126., 127., 128., 129., 130., 131., 132.,\n",
+ " 133., 134., 135., 136., 137., 138., 139., 140., 141., 142., 143., 144.,\n",
+ " 145., 146., 147., 148., 149., 150., 151., 152., 153., 154., 155., 156.,\n",
+ " 157., 158., 159., 160., 161., 162., 163., 164., 165., 166., 167., 168.])
variable
(variable)
<U3
'dni' 'dhi' 'ghi'
array(['dni', 'dhi', 'ghi'], dtype='<U3')
x
(x)
float64
-9.879 -9.758 -9.636 ... 14.92 15.0
array([-9.878788, -9.757576, -9.636364, -9.515152, -9.393939, -9.272727,\n",
+ " -9.151515, -9.030303, -8.909091, -8.787879, -8.666667, -8.545455,\n",
+ " -8.424242, -8.30303 , -8.181818, -8.060606, -7.939394, -7.818182,\n",
+ " -7.69697 , -7.575758, -7.454545, -7.333333, -7.212121, -7.090909,\n",
+ " -6.969697, -6.848485, -6.727273, -6.606061, -6.484848, -6.363636,\n",
+ " -6.242424, -6.121212, -6. , -5.878788, -5.757576, -5.636364,\n",
+ " -5.515152, -5.393939, -5.272727, -5.151515, -5.030303, -4.909091,\n",
+ " -4.787879, -4.666667, -4.545455, -4.424242, -4.30303 , -4.181818,\n",
+ " -4.060606, -3.939394, -3.818182, -3.69697 , -3.575758, -3.454545,\n",
+ " -3.333333, -3.212121, -3.090909, -2.969697, -2.848485, -2.727273,\n",
+ " -2.606061, -2.484848, -2.363636, -2.242424, -2.121212, -2. ,\n",
+ " -1.878788, -1.757576, -1.636364, -1.515152, -1.393939, -1.272727,\n",
+ " -1.151515, -1.030303, -0.909091, -0.787879, -0.666667, -0.545455,\n",
+ " -0.424242, -0.30303 , 0.060606, 0.181818, 0.30303 , 0.424242,\n",
+ " 0.545455, 0.666667, 0.787879, 0.909091, 1.030303, 1.151515,\n",
+ " 1.272727, 1.393939, 1.515152, 1.636364, 1.757576, 1.878788,\n",
+ " 2. , 13. , 13.083333, 13.166667, 13.25 , 13.333333,\n",
+ " 13.416667, 13.5 , 13.583333, 13.666667, 13.75 , 13.833333,\n",
+ " 13.916667, 14. , 14.083333, 14.166667, 14.25 , 14.333333,\n",
+ " 14.416667, 14.5 , 14.583333, 14.666667, 14.75 , 14.833333,\n",
+ " 14.916667, 15. ])
y
(y)
float64
35.0 35.08 35.17 ... 58.8 58.9 59.0
array([35. , 35.083333, 35.166667, 35.25 , 35.333333, 35.416667,\n",
+ " 35.5 , 35.583333, 35.666667, 35.75 , 35.833333, 35.916667,\n",
+ " 36. , 36.083333, 36.166667, 36.25 , 36.333333, 36.416667,\n",
+ " 36.5 , 36.583333, 36.666667, 36.75 , 36.833333, 36.916667,\n",
+ " 37. , 49.10101 , 49.20202 , 49.30303 , 49.40404 , 49.505051,\n",
+ " 49.606061, 49.707071, 49.808081, 49.909091, 50.010101, 50.111111,\n",
+ " 50.212121, 50.313131, 50.414141, 50.515152, 50.616162, 50.717172,\n",
+ " 50.818182, 50.919192, 51.020202, 51.121212, 51.222222, 51.323232,\n",
+ " 51.424242, 51.525253, 51.626263, 51.727273, 51.828283, 51.929293,\n",
+ " 52.030303, 52.131313, 52.232323, 52.333333, 52.434343, 52.535354,\n",
+ " 52.636364, 52.737374, 52.838384, 52.939394, 53.040404, 53.141414,\n",
+ " 53.242424, 53.343434, 53.444444, 53.545455, 53.646465, 53.747475,\n",
+ " 53.848485, 53.949495, 54.050505, 54.151515, 54.252525, 54.353535,\n",
+ " 54.454545, 54.555556, 54.656566, 54.757576, 54.858586, 54.959596,\n",
+ " 55.060606, 55.161616, 55.262626, 55.363636, 55.464646, 55.565657,\n",
+ " 55.666667, 55.767677, 55.868687, 55.969697, 56.070707, 56.171717,\n",
+ " 56.272727, 56.373737, 56.474747, 56.575758, 56.676768, 56.777778,\n",
+ " 56.878788, 56.979798, 57.080808, 57.383838, 57.484848, 57.585859,\n",
+ " 57.686869, 57.787879, 57.888889, 57.989899, 58.090909, 58.191919,\n",
+ " 58.292929, 58.393939, 58.494949, 58.59596 , 58.69697 , 58.79798 ,\n",
+ " 58.89899 , 59. ])
value
(variable, init_time, step, y, x)
float64
dask.array<chunksize=(1, 16, 42, 31, 31), meta=np.ndarray>
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | Bytes | \n",
+ " 6.93 GiB | \n",
+ " 4.93 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | Shape | \n",
+ " (3, 124, 168, 122, 122) | \n",
+ " (1, 16, 42, 31, 31) | \n",
+ " \n",
+ " \n",
+ " | Dask graph | \n",
+ " 1536 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " | Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
PandasIndex
PandasIndex(DatetimeIndex(['2022-05-01 00:00:00', '2022-05-01 06:00:00',\n",
+ " '2022-05-01 12:00:00', '2022-05-01 18:00:00',\n",
+ " '2022-05-02 00:00:00', '2022-05-02 06:00:00',\n",
+ " '2022-05-02 12:00:00', '2022-05-02 18:00:00',\n",
+ " '2022-05-03 00:00:00', '2022-05-03 06:00:00',\n",
+ " ...\n",
+ " '2022-05-29 12:00:00', '2022-05-29 18:00:00',\n",
+ " '2022-05-30 00:00:00', '2022-05-30 06:00:00',\n",
+ " '2022-05-30 12:00:00', '2022-05-30 18:00:00',\n",
+ " '2022-05-31 00:00:00', '2022-05-31 06:00:00',\n",
+ " '2022-05-31 12:00:00', '2022-05-31 18:00:00'],\n",
+ " dtype='datetime64[ns]', name='init_time', length=124, freq=None))
PandasIndex
PandasIndex(Float64Index([ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,\n",
+ " 10.0,\n",
+ " ...\n",
+ " 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0,\n",
+ " 168.0],\n",
+ " dtype='float64', name='step', length=168))
PandasIndex
PandasIndex(Index(['dni', 'dhi', 'ghi'], dtype='object', name='variable'))
PandasIndex
PandasIndex(Float64Index([-9.878788, -9.757576, -9.636364, -9.515152, -9.393939, -9.272727,\n",
+ " -9.151515, -9.030303, -8.909091, -8.787879,\n",
+ " ...\n",
+ " 14.25, 14.333333, 14.416667, 14.5, 14.583333, 14.666667,\n",
+ " 14.75, 14.833333, 14.916667, 15.0],\n",
+ " dtype='float64', name='x', length=122))
PandasIndex
PandasIndex(Float64Index([ 35.0, 35.083333, 35.166667, 35.25, 35.333333, 35.416667,\n",
+ " 35.5, 35.583333, 35.666667, 35.75,\n",
+ " ...\n",
+ " 58.090909, 58.191919, 58.292929, 58.393939, 58.494949, 58.59596,\n",
+ " 58.69697, 58.79798, 58.89899, 59.0],\n",
+ " dtype='float64', name='y', length=122))
"
+ ],
+ "text/plain": [
+ "\n",
+ "Dimensions: (init_time: 124, step: 168, variable: 3, y: 122, x: 122)\n",
+ "Coordinates:\n",
+ " * init_time (init_time) datetime64[ns] 2022-05-01 ... 2022-05-31T18:00:00\n",
+ " * step (step) float64 1.0 2.0 3.0 4.0 5.0 ... 165.0 166.0 167.0 168.0\n",
+ " * variable (variable) "
+ ]
+ },
+ "execution_count": 104,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "# path_zarr_test = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/zarr_format/r4/Malta/full_v2_201801.zarr\"\n",
+ "\n",
+ "path_zarr_test = \"/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/zarr_format/r3/full_v1_202205.zarr\"\n",
+ "ds = xr.open_zarr(path_zarr_test)\n",
+ "\n",
+ "ds\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "id": "acd344fc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "7441.528036\n"
+ ]
+ }
+ ],
+ "source": [
+ "print((ds.nbytes)/1000000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "id": "a9ed2e1d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "True\n",
+ "False\n",
+ "True\n",
+ "False\n",
+ "True\n",
+ "False\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_2050364/1594016849.py:2: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
+ " print('2018-01-04T06:00:00.000000000' in ds['value'].coords['init_time'].values)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('dhi' in ds['value'].coords['variable'].values)\n",
+ "print('2018-01-04T06:00:00.000000000' in ds['value'].coords['init_time'].values)\n",
+ "\n",
+ "print(np.all(np.diff(ds.coords['x']) > 0)) # For ascending. Output should be True.\n",
+ "print(np.all(np.diff(ds.coords['x']) < 0)) # For descending. Output should be True.\n",
+ "\n",
+ "print(np.all(np.diff(ds.coords['y']) > 0)) # For ascending. Output should be True.\n",
+ "print(np.all(np.diff(ds.coords['y']) < 0)) # For descending. Output should be True.\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "id": "0811c9dd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['2022-05-01T00:00:00.000000000' '2022-05-01T06:00:00.000000000'\n",
+ " '2022-05-01T12:00:00.000000000' '2022-05-01T18:00:00.000000000'\n",
+ " '2022-05-02T00:00:00.000000000' '2022-05-02T06:00:00.000000000'\n",
+ " '2022-05-02T12:00:00.000000000' '2022-05-02T18:00:00.000000000'\n",
+ " '2022-05-03T00:00:00.000000000' '2022-05-03T06:00:00.000000000'\n",
+ " '2022-05-03T12:00:00.000000000' '2022-05-03T18:00:00.000000000'\n",
+ " '2022-05-04T00:00:00.000000000' '2022-05-04T06:00:00.000000000'\n",
+ " '2022-05-04T12:00:00.000000000' '2022-05-04T18:00:00.000000000'\n",
+ " '2022-05-05T00:00:00.000000000' '2022-05-05T06:00:00.000000000'\n",
+ " '2022-05-05T12:00:00.000000000' '2022-05-05T18:00:00.000000000'\n",
+ " '2022-05-06T00:00:00.000000000' '2022-05-06T06:00:00.000000000'\n",
+ " '2022-05-06T12:00:00.000000000' '2022-05-06T18:00:00.000000000'\n",
+ " '2022-05-07T00:00:00.000000000' '2022-05-07T06:00:00.000000000'\n",
+ " '2022-05-07T12:00:00.000000000' '2022-05-07T18:00:00.000000000'\n",
+ " '2022-05-08T00:00:00.000000000' '2022-05-08T06:00:00.000000000'\n",
+ " '2022-05-08T12:00:00.000000000' '2022-05-08T18:00:00.000000000'\n",
+ " '2022-05-09T00:00:00.000000000' '2022-05-09T06:00:00.000000000'\n",
+ " '2022-05-09T12:00:00.000000000' '2022-05-09T18:00:00.000000000'\n",
+ " '2022-05-10T00:00:00.000000000' '2022-05-10T06:00:00.000000000'\n",
+ " '2022-05-10T12:00:00.000000000' '2022-05-10T18:00:00.000000000'\n",
+ " '2022-05-11T00:00:00.000000000' '2022-05-11T06:00:00.000000000'\n",
+ " '2022-05-11T12:00:00.000000000' '2022-05-11T18:00:00.000000000'\n",
+ " '2022-05-12T00:00:00.000000000' '2022-05-12T06:00:00.000000000'\n",
+ " '2022-05-12T12:00:00.000000000' '2022-05-12T18:00:00.000000000'\n",
+ " '2022-05-13T00:00:00.000000000' '2022-05-13T06:00:00.000000000'\n",
+ " '2022-05-13T12:00:00.000000000' '2022-05-13T18:00:00.000000000'\n",
+ " '2022-05-14T00:00:00.000000000' '2022-05-14T06:00:00.000000000'\n",
+ " '2022-05-14T12:00:00.000000000' '2022-05-14T18:00:00.000000000'\n",
+ " '2022-05-15T00:00:00.000000000' '2022-05-15T06:00:00.000000000'\n",
+ " '2022-05-15T12:00:00.000000000' '2022-05-15T18:00:00.000000000'\n",
+ " '2022-05-16T00:00:00.000000000' '2022-05-16T06:00:00.000000000'\n",
+ " '2022-05-16T12:00:00.000000000' '2022-05-16T18:00:00.000000000'\n",
+ " '2022-05-17T00:00:00.000000000' '2022-05-17T06:00:00.000000000'\n",
+ " '2022-05-17T12:00:00.000000000' '2022-05-17T18:00:00.000000000'\n",
+ " '2022-05-18T00:00:00.000000000' '2022-05-18T06:00:00.000000000'\n",
+ " '2022-05-18T12:00:00.000000000' '2022-05-18T18:00:00.000000000'\n",
+ " '2022-05-19T00:00:00.000000000' '2022-05-19T06:00:00.000000000'\n",
+ " '2022-05-19T12:00:00.000000000' '2022-05-19T18:00:00.000000000'\n",
+ " '2022-05-20T00:00:00.000000000' '2022-05-20T06:00:00.000000000'\n",
+ " '2022-05-20T12:00:00.000000000' '2022-05-20T18:00:00.000000000'\n",
+ " '2022-05-21T00:00:00.000000000' '2022-05-21T06:00:00.000000000'\n",
+ " '2022-05-21T12:00:00.000000000' '2022-05-21T18:00:00.000000000'\n",
+ " '2022-05-22T00:00:00.000000000' '2022-05-22T06:00:00.000000000'\n",
+ " '2022-05-22T12:00:00.000000000' '2022-05-22T18:00:00.000000000'\n",
+ " '2022-05-23T00:00:00.000000000' '2022-05-23T06:00:00.000000000'\n",
+ " '2022-05-23T12:00:00.000000000' '2022-05-23T18:00:00.000000000'\n",
+ " '2022-05-24T00:00:00.000000000' '2022-05-24T06:00:00.000000000'\n",
+ " '2022-05-24T12:00:00.000000000' '2022-05-24T18:00:00.000000000'\n",
+ " '2022-05-25T00:00:00.000000000' '2022-05-25T06:00:00.000000000'\n",
+ " '2022-05-25T12:00:00.000000000' '2022-05-25T18:00:00.000000000'\n",
+ " '2022-05-26T00:00:00.000000000' '2022-05-26T06:00:00.000000000'\n",
+ " '2022-05-26T12:00:00.000000000' '2022-05-26T18:00:00.000000000'\n",
+ " '2022-05-27T00:00:00.000000000' '2022-05-27T06:00:00.000000000'\n",
+ " '2022-05-27T12:00:00.000000000' '2022-05-27T18:00:00.000000000'\n",
+ " '2022-05-28T00:00:00.000000000' '2022-05-28T06:00:00.000000000'\n",
+ " '2022-05-28T12:00:00.000000000' '2022-05-28T18:00:00.000000000'\n",
+ " '2022-05-29T00:00:00.000000000' '2022-05-29T06:00:00.000000000'\n",
+ " '2022-05-29T12:00:00.000000000' '2022-05-29T18:00:00.000000000'\n",
+ " '2022-05-30T00:00:00.000000000' '2022-05-30T06:00:00.000000000'\n",
+ " '2022-05-30T12:00:00.000000000' '2022-05-30T18:00:00.000000000'\n",
+ " '2022-05-31T00:00:00.000000000' '2022-05-31T06:00:00.000000000'\n",
+ " '2022-05-31T12:00:00.000000000' '2022-05-31T18:00:00.000000000']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(ds['value'].coords['init_time'].values)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "id": "1e1abd14",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Select the data for a given init_time, x, and y\n",
+ "selected_data = ds['value'].sel(variable='dhi', init_time='2022-05-10T00:00:00.000000000')\n",
+ "\n",
+ "selected_data_2 = selected_data.sel(x=13.000000, y=35.000000 , method='nearest')\n",
+ "\n",
+ "selected_data_2.values\n",
+ "\n",
+ "# x=14.666667, y=35.166667\n",
+ "# x=13.250000, y=35.250000\n",
+ "\n",
+ "# Create a plot\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "selected_data_2.plot.line('o-')\n",
+ "plt.title('ghi vs step, init time: 2022-05-10T00:00:00.000000000, long:13.00000, lat:35.0000')\n",
+ "plt.xlabel('step')\n",
+ "plt.ylabel('dhi')\n",
+ "plt.grid(True)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "id": "d81cde68",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "KeyError",
+ "evalue": "\"not all values found in index 'init_time'. Try setting the `method` keyword argument (example: method='nearest').\"",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:548\u001b[0m, in \u001b[0;36mpandas._libs.index.DatetimeEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2263\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2273\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 1515196800000000000",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3803\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3802\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3803\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:516\u001b[0m, in \u001b[0;36mpandas._libs.index.DatetimeEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:550\u001b[0m, in \u001b[0;36mpandas._libs.index.DatetimeEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: Timestamp('2018-01-06 00:00:00')",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:736\u001b[0m, in \u001b[0;36mDatetimeIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 735\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mIndex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtolerance\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n",
+ "\u001b[0;31mKeyError\u001b[0m: Timestamp('2018-01-06 00:00:00')",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/indexes.py:473\u001b[0m, in \u001b[0;36mPandasIndex.sel\u001b[0;34m(self, labels, method, tolerance)\u001b[0m\n\u001b[1;32m 472\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 473\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 474\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:738\u001b[0m, in \u001b[0;36mDatetimeIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 738\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(orig_key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n",
+ "\u001b[0;31mKeyError\u001b[0m: '2018-01-06T00:00:00.000000000'",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[109], line 30\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df\n\u001b[1;32m 29\u001b[0m \u001b[38;5;66;03m# Call the function\u001b[39;00m\n\u001b[0;32m---> 30\u001b[0m nan_count_df \u001b[38;5;241m=\u001b[39m \u001b[43mcount_nans\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;66;03m# Print the DataFrame\u001b[39;00m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28mprint\u001b[39m(nan_count_df)\n",
+ "Cell \u001b[0;32mIn[109], line 19\u001b[0m, in \u001b[0;36mcount_nans\u001b[0;34m(ds)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# Iterating over each unique pair of x and y\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m x, y \u001b[38;5;129;01min\u001b[39;00m product(x_values, y_values):\n\u001b[1;32m 18\u001b[0m \u001b[38;5;66;03m# Select the data for that pair of x and y\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m selected_data \u001b[38;5;241m=\u001b[39m \u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvalue\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdni\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minit_time\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m2018-01-06T00:00:00.000000000\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;66;03m# Counting the NaN values\u001b[39;00m\n\u001b[1;32m 22\u001b[0m nan_count \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39misnan(selected_data\u001b[38;5;241m.\u001b[39mvalues)\u001b[38;5;241m.\u001b[39msum()\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/dataarray.py:1527\u001b[0m, in \u001b[0;36mDataArray.sel\u001b[0;34m(self, indexers, method, tolerance, drop, **indexers_kwargs)\u001b[0m\n\u001b[1;32m 1417\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msel\u001b[39m(\n\u001b[1;32m 1418\u001b[0m \u001b[38;5;28mself\u001b[39m: T_DataArray,\n\u001b[1;32m 1419\u001b[0m indexers: Mapping[Any, Any] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mindexers_kwargs: Any,\n\u001b[1;32m 1424\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T_DataArray:\n\u001b[1;32m 1425\u001b[0m \u001b[38;5;124;03m\"\"\"Return a new DataArray whose data is given by selecting index\u001b[39;00m\n\u001b[1;32m 1426\u001b[0m \u001b[38;5;124;03m labels along the specified dimension(s).\u001b[39;00m\n\u001b[1;32m 1427\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;124;03m Dimensions without coordinates: points\u001b[39;00m\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1527\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_to_temp_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1528\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1530\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1531\u001b[0m \u001b[43m \u001b[49m\u001b[43mtolerance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtolerance\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1532\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mindexers_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1533\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1534\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_from_temp_dataset(ds)\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/dataset.py:2565\u001b[0m, in \u001b[0;36mDataset.sel\u001b[0;34m(self, indexers, method, tolerance, drop, **indexers_kwargs)\u001b[0m\n\u001b[1;32m 2504\u001b[0m \u001b[38;5;124;03m\"\"\"Returns a new dataset with each array indexed by tick labels\u001b[39;00m\n\u001b[1;32m 2505\u001b[0m \u001b[38;5;124;03malong the specified dimension(s).\u001b[39;00m\n\u001b[1;32m 2506\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2562\u001b[0m \u001b[38;5;124;03mDataArray.sel\u001b[39;00m\n\u001b[1;32m 2563\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2564\u001b[0m indexers \u001b[38;5;241m=\u001b[39m either_dict_or_kwargs(indexers, indexers_kwargs, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msel\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2565\u001b[0m query_results \u001b[38;5;241m=\u001b[39m \u001b[43mmap_index_queries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2566\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtolerance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtolerance\u001b[49m\n\u001b[1;32m 2567\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2569\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m drop:\n\u001b[1;32m 2570\u001b[0m no_scalar_variables \u001b[38;5;241m=\u001b[39m {}\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/indexing.py:183\u001b[0m, in \u001b[0;36mmap_index_queries\u001b[0;34m(obj, indexers, method, tolerance, **indexers_kwargs)\u001b[0m\n\u001b[1;32m 181\u001b[0m results\u001b[38;5;241m.\u001b[39mappend(IndexSelResult(labels))\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 183\u001b[0m results\u001b[38;5;241m.\u001b[39mappend(\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 185\u001b[0m merged \u001b[38;5;241m=\u001b[39m merge_sel_results(results)\n\u001b[1;32m 187\u001b[0m \u001b[38;5;66;03m# drop dimension coordinates found in dimension indexers\u001b[39;00m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;66;03m# (also drop multi-index if any)\u001b[39;00m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;66;03m# (.sel() already ensures alignment)\u001b[39;00m\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/indexes.py:475\u001b[0m, in \u001b[0;36mPandasIndex.sel\u001b[0;34m(self, labels, method, tolerance)\u001b[0m\n\u001b[1;32m 473\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39mget_loc(label_value)\n\u001b[1;32m 474\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 475\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\n\u001b[1;32m 476\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnot all values found in index \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcoord_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTry setting the `method` keyword argument (example: method=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnearest\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m).\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 478\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m label_array\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mkind \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 481\u001b[0m indexer \u001b[38;5;241m=\u001b[39m label_array\n",
+ "\u001b[0;31mKeyError\u001b[0m: \"not all values found in index 'init_time'. Try setting the `method` keyword argument (example: method='nearest').\""
+ ]
+ }
+ ],
+ "source": [
+ "# function to check quality of the data\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from itertools import product\n",
+ "\n",
+ "# Function to count NaNs for each combination of x and y\n",
+ "def count_nans(ds):\n",
+ " # Get unique values of x and y\n",
+ " x_values = np.unique(ds['x'])\n",
+ " y_values = np.unique(ds['y'])\n",
+ "\n",
+ " # Create an empty DataFrame\n",
+ " df = pd.DataFrame(columns=['x', 'y', 'NaN Count'])\n",
+ "\n",
+ " # Iterating over each unique pair of x and y\n",
+ " for x, y in product(x_values, y_values):\n",
+ " # Select the data for that pair of x and y\n",
+ " selected_data = ds['value'].sel(variable='dni', init_time='2018-01-06T00:00:00.000000000', x=x, y=y)\n",
+ " \n",
+ " # Counting the NaN values\n",
+ " nan_count = np.isnan(selected_data.values).sum()\n",
+ "\n",
+ " # Append these values to a dataframe\n",
+ " df = df.append({'x': x, 'y': y, 'NaN Count': nan_count}, ignore_index=True)\n",
+ " \n",
+ " return df\n",
+ "\n",
+ "# Call the function\n",
+ "nan_count_df = count_nans(ds)\n",
+ "\n",
+ "# Print the DataFrame\n",
+ "print(nan_count_df)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "id": "5bfd9739",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ " y | \n",
+ " NaN Count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 565 | \n",
+ " 14.833333 | \n",
+ " 36.250000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 566 | \n",
+ " 14.833333 | \n",
+ " 36.333333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 567 | \n",
+ " 14.833333 | \n",
+ " 36.416667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 568 | \n",
+ " 14.833333 | \n",
+ " 36.500000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 569 | \n",
+ " 14.833333 | \n",
+ " 36.583333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 570 | \n",
+ " 14.833333 | \n",
+ " 36.666667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 571 | \n",
+ " 14.833333 | \n",
+ " 36.750000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 572 | \n",
+ " 14.833333 | \n",
+ " 36.833333 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 573 | \n",
+ " 14.833333 | \n",
+ " 36.916667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 574 | \n",
+ " 14.833333 | \n",
+ " 37.000000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 575 | \n",
+ " 14.916667 | \n",
+ " 35.000000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 576 | \n",
+ " 14.916667 | \n",
+ " 35.083333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 577 | \n",
+ " 14.916667 | \n",
+ " 35.166667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 578 | \n",
+ " 14.916667 | \n",
+ " 35.250000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 579 | \n",
+ " 14.916667 | \n",
+ " 35.333333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 580 | \n",
+ " 14.916667 | \n",
+ " 35.416667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 581 | \n",
+ " 14.916667 | \n",
+ " 35.500000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 582 | \n",
+ " 14.916667 | \n",
+ " 35.583333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 583 | \n",
+ " 14.916667 | \n",
+ " 35.666667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 584 | \n",
+ " 14.916667 | \n",
+ " 35.750000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 585 | \n",
+ " 14.916667 | \n",
+ " 35.833333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 586 | \n",
+ " 14.916667 | \n",
+ " 35.916667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 587 | \n",
+ " 14.916667 | \n",
+ " 36.000000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 588 | \n",
+ " 14.916667 | \n",
+ " 36.083333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 589 | \n",
+ " 14.916667 | \n",
+ " 36.166667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 590 | \n",
+ " 14.916667 | \n",
+ " 36.250000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 591 | \n",
+ " 14.916667 | \n",
+ " 36.333333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 592 | \n",
+ " 14.916667 | \n",
+ " 36.416667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 593 | \n",
+ " 14.916667 | \n",
+ " 36.500000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 594 | \n",
+ " 14.916667 | \n",
+ " 36.583333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 595 | \n",
+ " 14.916667 | \n",
+ " 36.666667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 596 | \n",
+ " 14.916667 | \n",
+ " 36.750000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 597 | \n",
+ " 14.916667 | \n",
+ " 36.833333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 598 | \n",
+ " 14.916667 | \n",
+ " 36.916667 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 599 | \n",
+ " 14.916667 | \n",
+ " 37.000000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 600 | \n",
+ " 15.000000 | \n",
+ " 35.000000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 601 | \n",
+ " 15.000000 | \n",
+ " 35.083333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 602 | \n",
+ " 15.000000 | \n",
+ " 35.166667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 603 | \n",
+ " 15.000000 | \n",
+ " 35.250000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 604 | \n",
+ " 15.000000 | \n",
+ " 35.333333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 605 | \n",
+ " 15.000000 | \n",
+ " 35.416667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 606 | \n",
+ " 15.000000 | \n",
+ " 35.500000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 607 | \n",
+ " 15.000000 | \n",
+ " 35.583333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 608 | \n",
+ " 15.000000 | \n",
+ " 35.666667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 609 | \n",
+ " 15.000000 | \n",
+ " 35.750000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 610 | \n",
+ " 15.000000 | \n",
+ " 35.833333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 611 | \n",
+ " 15.000000 | \n",
+ " 35.916667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 612 | \n",
+ " 15.000000 | \n",
+ " 36.000000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 613 | \n",
+ " 15.000000 | \n",
+ " 36.083333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 614 | \n",
+ " 15.000000 | \n",
+ " 36.166667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 615 | \n",
+ " 15.000000 | \n",
+ " 36.250000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 616 | \n",
+ " 15.000000 | \n",
+ " 36.333333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 617 | \n",
+ " 15.000000 | \n",
+ " 36.416667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 618 | \n",
+ " 15.000000 | \n",
+ " 36.500000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 619 | \n",
+ " 15.000000 | \n",
+ " 36.583333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 620 | \n",
+ " 15.000000 | \n",
+ " 36.666667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 621 | \n",
+ " 15.000000 | \n",
+ " 36.750000 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 622 | \n",
+ " 15.000000 | \n",
+ " 36.833333 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 623 | \n",
+ " 15.000000 | \n",
+ " 36.916667 | \n",
+ " 168.0 | \n",
+ "
\n",
+ " \n",
+ " | 624 | \n",
+ " 15.000000 | \n",
+ " 37.000000 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " x y NaN Count\n",
+ "565 14.833333 36.250000 168.0\n",
+ "566 14.833333 36.333333 168.0\n",
+ "567 14.833333 36.416667 168.0\n",
+ "568 14.833333 36.500000 168.0\n",
+ "569 14.833333 36.583333 168.0\n",
+ "570 14.833333 36.666667 168.0\n",
+ "571 14.833333 36.750000 168.0\n",
+ "572 14.833333 36.833333 0.0\n",
+ "573 14.833333 36.916667 168.0\n",
+ "574 14.833333 37.000000 168.0\n",
+ "575 14.916667 35.000000 168.0\n",
+ "576 14.916667 35.083333 168.0\n",
+ "577 14.916667 35.166667 168.0\n",
+ "578 14.916667 35.250000 168.0\n",
+ "579 14.916667 35.333333 168.0\n",
+ "580 14.916667 35.416667 168.0\n",
+ "581 14.916667 35.500000 168.0\n",
+ "582 14.916667 35.583333 168.0\n",
+ "583 14.916667 35.666667 168.0\n",
+ "584 14.916667 35.750000 168.0\n",
+ "585 14.916667 35.833333 168.0\n",
+ "586 14.916667 35.916667 168.0\n",
+ "587 14.916667 36.000000 168.0\n",
+ "588 14.916667 36.083333 168.0\n",
+ "589 14.916667 36.166667 168.0\n",
+ "590 14.916667 36.250000 168.0\n",
+ "591 14.916667 36.333333 168.0\n",
+ "592 14.916667 36.416667 168.0\n",
+ "593 14.916667 36.500000 168.0\n",
+ "594 14.916667 36.583333 168.0\n",
+ "595 14.916667 36.666667 168.0\n",
+ "596 14.916667 36.750000 168.0\n",
+ "597 14.916667 36.833333 168.0\n",
+ "598 14.916667 36.916667 0.0\n",
+ "599 14.916667 37.000000 168.0\n",
+ "600 15.000000 35.000000 168.0\n",
+ "601 15.000000 35.083333 168.0\n",
+ "602 15.000000 35.166667 168.0\n",
+ "603 15.000000 35.250000 168.0\n",
+ "604 15.000000 35.333333 168.0\n",
+ "605 15.000000 35.416667 168.0\n",
+ "606 15.000000 35.500000 168.0\n",
+ "607 15.000000 35.583333 168.0\n",
+ "608 15.000000 35.666667 168.0\n",
+ "609 15.000000 35.750000 168.0\n",
+ "610 15.000000 35.833333 168.0\n",
+ "611 15.000000 35.916667 168.0\n",
+ "612 15.000000 36.000000 168.0\n",
+ "613 15.000000 36.083333 168.0\n",
+ "614 15.000000 36.166667 168.0\n",
+ "615 15.000000 36.250000 168.0\n",
+ "616 15.000000 36.333333 168.0\n",
+ "617 15.000000 36.416667 168.0\n",
+ "618 15.000000 36.500000 168.0\n",
+ "619 15.000000 36.583333 168.0\n",
+ "620 15.000000 36.666667 168.0\n",
+ "621 15.000000 36.750000 168.0\n",
+ "622 15.000000 36.833333 168.0\n",
+ "623 15.000000 36.916667 168.0\n",
+ "624 15.000000 37.000000 0.0"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nan_count_df.tail(60)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "id": "32ea5a93",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "KeyError",
+ "evalue": "\"not all values found in index 'init_time'. Try setting the `method` keyword argument (example: method='nearest').\"",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:548\u001b[0m, in \u001b[0;36mpandas._libs.index.DatetimeEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2263\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2273\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 1515196800000000000",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3803\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3802\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3803\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:516\u001b[0m, in \u001b[0;36mpandas._libs.index.DatetimeEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/_libs/index.pyx:550\u001b[0m, in \u001b[0;36mpandas._libs.index.DatetimeEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: Timestamp('2018-01-06 00:00:00')",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:736\u001b[0m, in \u001b[0;36mDatetimeIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 735\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mIndex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtolerance\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n",
+ "\u001b[0;31mKeyError\u001b[0m: Timestamp('2018-01-06 00:00:00')",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/indexes.py:473\u001b[0m, in \u001b[0;36mPandasIndex.sel\u001b[0;34m(self, labels, method, tolerance)\u001b[0m\n\u001b[1;32m 472\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 473\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 474\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:738\u001b[0m, in \u001b[0;36mDatetimeIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 738\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(orig_key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n",
+ "\u001b[0;31mKeyError\u001b[0m: '2018-01-06T00:00:00.000000000'",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[111], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Select the data for a given init_time, x, and y\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m selected_data \u001b[38;5;241m=\u001b[39m \u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvalue\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdni\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minit_time\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m2018-01-06T00:00:00.000000000\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m selected_data_2 \u001b[38;5;241m=\u001b[39m selected_data\u001b[38;5;241m.\u001b[39msel(x\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m13.000000\u001b[39m, y\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m35.000000\u001b[39m , method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnearest\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 6\u001b[0m selected_data_2\u001b[38;5;241m.\u001b[39mvalues\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/dataarray.py:1527\u001b[0m, in \u001b[0;36mDataArray.sel\u001b[0;34m(self, indexers, method, tolerance, drop, **indexers_kwargs)\u001b[0m\n\u001b[1;32m 1417\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msel\u001b[39m(\n\u001b[1;32m 1418\u001b[0m \u001b[38;5;28mself\u001b[39m: T_DataArray,\n\u001b[1;32m 1419\u001b[0m indexers: Mapping[Any, Any] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mindexers_kwargs: Any,\n\u001b[1;32m 1424\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T_DataArray:\n\u001b[1;32m 1425\u001b[0m \u001b[38;5;124;03m\"\"\"Return a new DataArray whose data is given by selecting index\u001b[39;00m\n\u001b[1;32m 1426\u001b[0m \u001b[38;5;124;03m labels along the specified dimension(s).\u001b[39;00m\n\u001b[1;32m 1427\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;124;03m Dimensions without coordinates: points\u001b[39;00m\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1527\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_to_temp_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1528\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1530\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1531\u001b[0m \u001b[43m \u001b[49m\u001b[43mtolerance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtolerance\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1532\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mindexers_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1533\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1534\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_from_temp_dataset(ds)\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/dataset.py:2565\u001b[0m, in \u001b[0;36mDataset.sel\u001b[0;34m(self, indexers, method, tolerance, drop, **indexers_kwargs)\u001b[0m\n\u001b[1;32m 2504\u001b[0m \u001b[38;5;124;03m\"\"\"Returns a new dataset with each array indexed by tick labels\u001b[39;00m\n\u001b[1;32m 2505\u001b[0m \u001b[38;5;124;03malong the specified dimension(s).\u001b[39;00m\n\u001b[1;32m 2506\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2562\u001b[0m \u001b[38;5;124;03mDataArray.sel\u001b[39;00m\n\u001b[1;32m 2563\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2564\u001b[0m indexers \u001b[38;5;241m=\u001b[39m either_dict_or_kwargs(indexers, indexers_kwargs, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msel\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2565\u001b[0m query_results \u001b[38;5;241m=\u001b[39m \u001b[43mmap_index_queries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2566\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtolerance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtolerance\u001b[49m\n\u001b[1;32m 2567\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2569\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m drop:\n\u001b[1;32m 2570\u001b[0m no_scalar_variables \u001b[38;5;241m=\u001b[39m {}\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/indexing.py:183\u001b[0m, in \u001b[0;36mmap_index_queries\u001b[0;34m(obj, indexers, method, tolerance, **indexers_kwargs)\u001b[0m\n\u001b[1;32m 181\u001b[0m results\u001b[38;5;241m.\u001b[39mappend(IndexSelResult(labels))\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 183\u001b[0m results\u001b[38;5;241m.\u001b[39mappend(\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 185\u001b[0m merged \u001b[38;5;241m=\u001b[39m merge_sel_results(results)\n\u001b[1;32m 187\u001b[0m \u001b[38;5;66;03m# drop dimension coordinates found in dimension indexers\u001b[39;00m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;66;03m# (also drop multi-index if any)\u001b[39;00m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;66;03m# (.sel() already ensures alignment)\u001b[39;00m\n",
+ "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/psp-vzQW2Xvb-py3.10/lib/python3.10/site-packages/xarray/core/indexes.py:475\u001b[0m, in \u001b[0;36mPandasIndex.sel\u001b[0;34m(self, labels, method, tolerance)\u001b[0m\n\u001b[1;32m 473\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex\u001b[38;5;241m.\u001b[39mget_loc(label_value)\n\u001b[1;32m 474\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 475\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\n\u001b[1;32m 476\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnot all values found in index \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcoord_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTry setting the `method` keyword argument (example: method=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnearest\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m).\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 478\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m label_array\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mkind \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 481\u001b[0m indexer \u001b[38;5;241m=\u001b[39m label_array\n",
+ "\u001b[0;31mKeyError\u001b[0m: \"not all values found in index 'init_time'. Try setting the `method` keyword argument (example: method='nearest').\""
+ ]
+ }
+ ],
+ "source": [
+ "# Select the data for a given init_time, x, and y\n",
+ "selected_data = ds['value'].sel(variable='dni', init_time='2018-01-06T00:00:00.000000000')\n",
+ "\n",
+ "selected_data_2 = selected_data.sel(x=13.000000, y=35.000000 , method='nearest')\n",
+ "\n",
+ "selected_data_2.values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "id": "b9e8dbe6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# 'selected_data' must be 2D for this to work, so let's take a slice along 'step'\n",
+ "selected_data_slice = selected_data.isel(step=10)\n",
+ "\n",
+ "fig, ax = plt.subplots(figsize=(10, 6))\n",
+ "\n",
+ "# Create the plot\n",
+ "selected_data_slice.plot(ax=ax, x='x', y='y')\n",
+ "\n",
+ "# Set titles and labels\n",
+ "ax.set_title('Map of variable \"dni\" at the initial time \"2018-01-06T00:00:00.000000000\" at step 10')\n",
+ "ax.set_xlabel('Longitude (x)')\n",
+ "ax.set_ylabel('Latitude (y)')\n",
+ "\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "id": "1e24d887",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "array(True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Check if there are any non-NaN values in the 'value' data variable\n",
+ "non_nan_exists = ds['value'].notnull().any().compute()\n",
+ "\n",
+ "print(non_nan_exists)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "id": "bf72ef11",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[[ 0., nan, nan, ..., nan, nan, nan],\n",
+ " [nan, 0., nan, ..., nan, nan, nan],\n",
+ " [nan, nan, 0., ..., nan, nan, nan],\n",
+ " ...,\n",
+ " [nan, nan, nan, ..., 0., nan, nan],\n",
+ " [nan, nan, nan, ..., nan, 0., nan],\n",
+ " [nan, nan, nan, ..., nan, nan, 0.]],\n",
+ "\n",
+ " [[ 0., nan, nan, ..., nan, nan, nan],\n",
+ " [nan, 0., nan, ..., nan, nan, nan],\n",
+ " [nan, nan, 0., ..., nan, nan, nan],\n",
+ " ...,\n",
+ " [nan, nan, nan, ..., 0., nan, nan],\n",
+ " [nan, nan, nan, ..., nan, 0., nan],\n",
+ " [nan, nan, nan, ..., nan, nan, 0.]],\n",
+ "\n",
+ " [[ 0., nan, nan, ..., nan, nan, nan],\n",
+ " [nan, 0., nan, ..., nan, nan, nan],\n",
+ " [nan, nan, 0., ..., nan, nan, nan],\n",
+ " ...,\n",
+ " [nan, nan, nan, ..., 0., nan, nan],\n",
+ " [nan, nan, nan, ..., nan, 0., nan],\n",
+ " [nan, nan, nan, ..., nan, nan, 0.]],\n",
+ "\n",
+ " ...,\n",
+ "\n",
+ " [[ 0., nan, nan, ..., nan, nan, nan],\n",
+ " [nan, 0., nan, ..., nan, nan, nan],\n",
+ " [nan, nan, 0., ..., nan, nan, nan],\n",
+ " ...,\n",
+ " [nan, nan, nan, ..., 0., nan, nan],\n",
+ " [nan, nan, nan, ..., nan, 0., nan],\n",
+ " [nan, nan, nan, ..., nan, nan, 0.]],\n",
+ "\n",
+ " [[ 0., nan, nan, ..., nan, nan, nan],\n",
+ " [nan, 0., nan, ..., nan, nan, nan],\n",
+ " [nan, nan, 0., ..., nan, nan, nan],\n",
+ " ...,\n",
+ " [nan, nan, nan, ..., 0., nan, nan],\n",
+ " [nan, nan, nan, ..., nan, 0., nan],\n",
+ " [nan, nan, nan, ..., nan, nan, 0.]],\n",
+ "\n",
+ " [[ 0., nan, nan, ..., nan, nan, nan],\n",
+ " [nan, 0., nan, ..., nan, nan, nan],\n",
+ " [nan, nan, 0., ..., nan, nan, nan],\n",
+ " ...,\n",
+ " [nan, nan, nan, ..., 0., nan, nan],\n",
+ " [nan, nan, nan, ..., nan, 0., nan],\n",
+ " [nan, nan, nan, ..., nan, nan, 0.]]])"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "selected_data.values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7ac84f74",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nwp/excarta/excarta_processing.py b/nwp/excarta/excarta_processing.py
new file mode 100644
index 0000000..9fb01d6
--- /dev/null
+++ b/nwp/excarta/excarta_processing.py
@@ -0,0 +1,101 @@
+import argparse
+import os
+import pathlib
+from datetime import datetime
+
+import gcsfs
+import numpy as np
+import xarray as xr
+
+
+def _parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("output", type=pathlib.Path, help="Output zarr file")
+ parser.add_argument("year", type=int, help="Year to process")
+ parser.add_argument(
+ "--force",
+ action="store_true",
+ help="Overwrite the output file if it already exists.",
+ )
+
+ return parser.parse_args()
+
+
+def extract_files(args):
+ # initialize a GCSFileSystem
+ gcs = gcsfs.GCSFileSystem(project="excarta")
+ path = f"gs://excarta-public-us/pilots/ocf/{args.year}/"
+ files = gcs.ls(path)
+ datasets = []
+
+ for file in files:
+ filename = os.path.basename(file)
+
+ # extract date part from filename
+ date_part = filename.split(".")[0] # adjust this line if necessary
+
+ # # convert date_part into a datetime
+ date = datetime.strptime(date_part, "%Y%m%d%H") # adjust format string if necessary
+
+ print(date)
+ # convert the date to numpy datetime64
+ date_np = np.datetime64(date)
+
+ # load the Zarr store as an xarray Dataset
+ ds = xr.open_zarr(gcs.get_mapper(file), consolidated=True)
+ ds = ds.assign_coords(ts=date_np)
+
+ # calculate time differences in hours
+ step_values = (ds["datetimes"].values - date_np) / np.timedelta64(1, "h")
+ ds = ds.assign_coords(time=step_values)
+ ds = ds.rename({"time": "step"})
+
+ # add 'locidx' to the coordinates
+ ds = ds.assign_coords(locidx=ds["locidx"])
+ ds = ds.set_coords(["latitude", "longitude"])
+
+ # add to the list of datasets
+ datasets.append(ds)
+
+ return datasets
+
+
+def merged_zarrs(ds):
+ ds_merged = xr.concat(ds, dim="ts")
+ ds_merged = ds_merged.drop_vars("datetimes")
+
+ var_names = ds_merged.data_vars
+ d2 = xr.concat([ds_merged[v] for v in var_names], dim="variable")
+ d2 = d2.assign_coords(variable=("variable", var_names))
+ ds_merged = xr.Dataset(dict(value=d2))
+ ds_merged = ds_merged.sortby("step")
+ ds_merged = ds_merged.sortby("ts")
+
+ ds_merged["step"] = (
+ "step",
+ np.array(ds_merged["step"].values, dtype="timedelta64[h]"),
+ )
+
+ return ds_merged
+
+
+def main():
+ args = _parse_args()
+
+ output_path = f"{args.output}/excarta_{args.year}.zarr"
+
+ # if args.output.exists() and not args.force:
+ # raise RuntimeError(f'Output file "{args.output}" already exist')
+
+ datasets = extract_files(args)
+ print("merging zarrs")
+ ds_merged = merged_zarrs(datasets)
+ print("zarrs merged")
+
+ ds_merged.to_zarr(output_path)
+
+ print("file saved at output_path")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/nwp/excarta/merge_excarta.py b/nwp/excarta/merge_excarta.py
new file mode 100644
index 0000000..2684ad5
--- /dev/null
+++ b/nwp/excarta/merge_excarta.py
@@ -0,0 +1,66 @@
+# import libs
+import argparse
+import os
+import pathlib
+
+import numpy as np
+import xarray as xr
+
+
+def _parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("input", type=pathlib.Path, help="Path to folder containing files")
+ parser.add_argument(
+ "output",
+ type=pathlib.Path,
+ help="Output path, include the file name with .zarr ending",
+ )
+ return parser.parse_args()
+
+
+def merge_zarr_files(zarr_path, merged_zarr_path):
+ # Collect paths of Zarr files in the specified directory
+ zarr_files = [
+ os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith(".zarr")
+ ]
+
+ # Open the datasets and store them in a list
+ datasets = [xr.open_dataset(file) for file in zarr_files]
+
+ # Concatenate the datasets along the 'init_time' dimension
+ merged_ds = xr.concat(datasets, dim="init_time")
+
+ merged_ds = merged_ds.sortby("init_time")
+
+ # Define the specific range of x and y coordinates
+ # x_range = (-10, 2) # Example x coordinate range
+ # y_range = (49, 59) # Example y coordinate range
+
+ # Iterate over the remaining Zarr files and merge them into the initial dataset
+ # for file in zarr_files[1:]:
+ # xr.open_zarr(file)
+ # print(file)
+
+ # # ds_filt = ds.sel(x=slice(*x_range), y=slice(*y_range))
+ # merged_ds = merged_ds.combine_first(ds_filt)
+
+ # Rechunk the merged dataset
+ # merged_ds = merged_ds.chunk(chunks={"init_time": 10, "x": 100, "y": 100})
+
+ # Get dims/coords into correct type
+ step_hours = merged_ds["step"].values
+ step_timedelta = np.timedelta64(1, "h") * step_hours
+ ds_timedelta = merged_ds.assign_coords(step=step_timedelta)
+
+ # Save the merged dataset as a new Zarr file
+ ds_timedelta.to_zarr(merged_zarr_path)
+
+
+def main():
+ args = _parse_args()
+ merge_zarr_files(args.input, args.output)
+
+
+# Check if script is being run directly
+if __name__ == "__main__":
+ main()
diff --git a/nwp/excarta/parse_excarta_monthly.py b/nwp/excarta/parse_excarta_monthly.py
new file mode 100644
index 0000000..9666e57
--- /dev/null
+++ b/nwp/excarta/parse_excarta_monthly.py
@@ -0,0 +1,144 @@
+# Low memory script
+import argparse
+import os
+import pathlib
+from datetime import datetime
+
+import pandas as pd
+import xarray as xr
+
+
+def _parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("output", type=pathlib.Path, help="Output zarr file")
+ parser.add_argument("year", type=int, help="Year to process")
+ parser.add_argument("month", type=int, help="Month to process")
+ return parser.parse_args()
+
+
+def data_loader(folder_path, month_to_process):
+ """
+ Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
+ Only process files for the month 'YYYYMM' given by month_to_process
+ """
+ month_to_process = datetime.strptime(month_to_process, "%Y%m")
+ column_names = [
+ "DateTimeUTC",
+ "LocationId",
+ "Latitude",
+ "Longitude",
+ "dni",
+ "dhi",
+ "ghi",
+ ]
+ files = os.listdir(folder_path)
+ datasets = []
+
+ for filename in files:
+ if filename.endswith(".csv") and not filename.startswith("._"):
+ file_datetime = datetime.strptime(filename[:-4], "%Y%m%d%H")
+
+ if (file_datetime.year == month_to_process.year) and (
+ file_datetime.month == month_to_process.month
+ ):
+ file_path = os.path.join(folder_path, filename)
+ df = pd.read_csv(
+ file_path,
+ header=None,
+ names=column_names,
+ parse_dates=["DateTimeUTC"],
+ )
+
+ df["step"] = (
+ df["DateTimeUTC"] - file_datetime
+ ).dt.total_seconds() / 3600 # convert timedelta to hours
+ df["init_time"] = file_datetime
+
+ # Convert the dataframe to an xarray Dataset and append to the list
+ ds = xr.Dataset.from_dataframe(df)
+ ds = ds.drop_vars(["LocationId", "DateTimeUTC"])
+ datasets.append(ds)
+
+ return datasets
+
+
+def load_data_from_all_years(parent_folder_path, month_to_process):
+ all_datasets = []
+
+ # Get 'year' part from month_to_process 'YYYYMM' in string format
+ year_to_process = int(month_to_process[:4])
+
+ folder_path = os.path.join(parent_folder_path, str(year_to_process))
+ datasets = data_loader(folder_path, month_to_process)
+ all_datasets.extend(datasets)
+
+ return all_datasets
+
+
+def pdtocdf(datasets):
+ """
+ Processes the xarray Datasets and merges them.
+ """
+
+ datasets = [
+ ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
+ ]
+
+ ds = xr.concat(datasets, dim="index")
+
+ # Subtract one hour from the init_time dimension
+ ds["init_time"] = ds["init_time"] - pd.Timedelta(hours=1)
+
+ # # Define the specific range of x and y coordinates to filter the data on
+ # x_range = (-10, 2) # Example x coordinate range
+ # y_range = (49, 59) # Example y coordinate range
+
+ ds = ds.rename({"Latitude": "y", "Longitude": "x"})
+
+ var_names = ds.data_vars
+ d2 = xr.concat([ds[v] for v in var_names], dim="variable")
+ d2 = d2.assign_coords(variable=("variable", var_names))
+ ds = xr.Dataset(dict(value=d2))
+ ds = ds.sortby("step")
+ ds = ds.sortby("init_time")
+
+ return ds
+
+
+def main():
+ args = _parse_args()
+
+ if args.output.exists():
+ raise RuntimeError(f'Output file "{args.output}" already exist')
+
+ PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
+ month_to_process = (
+ f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
+ )
+ datasets = load_data_from_all_years(PATH, month_to_process)
+ ds = pdtocdf(datasets)
+
+ print(ds.dims)
+ print(ds.coords)
+
+ # ds = ds.sel(x=slice(float(-10), float(2)), y=slice(float(49), float(59)))
+
+ print(ds)
+ ds = ds.unstack("index")
+
+ # selecting data based on just a sinlge point for Malta,
+ # TO DO: this would get change to be a slice for the future
+ ds_filt = ds.sel(x=14, y=36)
+
+ print(ds_filt)
+
+ file_ending = ".zarr"
+
+ # Create output directory name including the year and month to process
+ output_name = f"{args.output}{args.year}{args.month:02d}{file_ending}"
+ ds_filt.to_zarr(output_name)
+
+
+# Check if script is being run directly
+if __name__ == "__main__":
+ main()
diff --git a/nwp/excarta/parse_excarta_to_output.py b/nwp/excarta/parse_excarta_to_output.py
new file mode 100644
index 0000000..bcd275d
--- /dev/null
+++ b/nwp/excarta/parse_excarta_to_output.py
@@ -0,0 +1,105 @@
+import argparse
+import datetime
+import os
+import pathlib
+from datetime import datetime
+
+import pandas as pd
+import xarray as xr
+
+
+def _parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("output", type=pathlib.Path, help="Output zarr file")
+ return parser.parse_args()
+
+
+def data_loader(folder_path):
+ """
+ Loads and transforms data from CSV files in the given folder_path.
+ """
+ column_names = [
+ "DateTimeUTC",
+ "LocationId",
+ "Latitude",
+ "Longitude",
+ "dni",
+ "dhi",
+ "ghi",
+ ]
+ files = os.listdir(folder_path)
+ dfs = []
+
+ for filename in files:
+ if filename.endswith(".csv") and not filename.startswith("._"):
+ file_path = os.path.join(folder_path, filename)
+ df = pd.read_csv(
+ file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
+ )
+
+ datetime_str = filename[:-4]
+ datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
+
+ df["step"] = (
+ df["DateTimeUTC"] - datetime_obj
+ ).dt.total_seconds() / 3600 # convert timedelta to hours
+ df["init_time"] = datetime_obj
+ dfs.append(df)
+
+ return dfs
+
+
+def load_data_from_all_years(parent_folder_path):
+ """
+ Loads data from all the year folders under the parent path.
+ """
+ all_dataframes = []
+
+ # Actual date range is 2018 to 2022 (for in range use (2018,2023))
+ for year in range(2018, 2019):
+ folder_path = os.path.join(parent_folder_path, str(year))
+ dataframes = data_loader(folder_path)
+ all_dataframes.extend(dataframes)
+
+ return all_dataframes
+
+
+def pdtocdf(dfs):
+ """
+ Converts pandas dataframe to an xarray dataset.
+ """
+ merged_df = pd.concat(dfs, ignore_index=True)
+
+ ds = xr.Dataset.from_dataframe(merged_df)
+ ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack("index")
+ ds = ds.drop_vars(["LocationId", "DateTimeUTC"])
+
+ var_names = ds.data_vars
+ d2 = xr.concat([ds[v] for v in var_names], dim="variable")
+ d2 = d2.assign_coords(variable=("variable", var_names))
+ ds = xr.Dataset(dict(value=d2))
+ ds = ds.sortby("step")
+ ds = ds.sortby("init_time")
+ ds = ds.rename({"Latitude": "y", "Longitude": "x"})
+
+ return ds
+
+
+def main():
+ """
+ Main function to control the flow of the script.
+ """
+ args = _parse_args()
+
+ if args.output.exists():
+ raise RuntimeError(f'Output file "{args.output}" already exist')
+
+ PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
+ dfs = load_data_from_all_years(PATH)
+ ds = pdtocdf(dfs)
+ ds.to_zarr(args.output)
+
+
+# Check if script is being run directly
+if __name__ == "__main__":
+ main()
diff --git a/nwp/excarta/parse_excarta_to_output_low_mem.py b/nwp/excarta/parse_excarta_to_output_low_mem.py
new file mode 100644
index 0000000..e6cb3d1
--- /dev/null
+++ b/nwp/excarta/parse_excarta_to_output_low_mem.py
@@ -0,0 +1,112 @@
+# Low memory script
+import os
+import pathlib
+from datetime import datetime
+
+import pandas as pd
+import xarray as xr
+
+
+def _parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("output", type=pathlib.Path, help="Output zarr file")
+ return parser.parse_args()
+
+
+def data_loader(folder_path):
+ """
+ Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
+ """
+ column_names = [
+ "DateTimeUTC",
+ "LocationId",
+ "Latitude",
+ "Longitude",
+ "dni",
+ "dhi",
+ "ghi",
+ ]
+ files = os.listdir(folder_path)
+ datasets = []
+
+ for filename in files:
+ if filename.endswith(".csv") and not filename.startswith("._"):
+ file_path = os.path.join(folder_path, filename)
+
+ df = pd.read_csv(
+ file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
+ )
+ datetime_str = filename[:-4]
+ datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
+ df["step"] = (
+ df["DateTimeUTC"] - datetime_obj
+ ).dt.total_seconds() / 3600 # convert timedelta to hours
+ df["init_time"] = datetime_obj
+
+ # Convert the dataframe to an xarray Dataset and append to the list
+ ds = xr.Dataset.from_dataframe(df)
+ ds = ds.drop_vars(["LocationId", "DateTimeUTC"])
+ datasets.append(ds)
+
+ return datasets
+
+
+def load_data_from_all_years(parent_folder_path):
+ all_datasets = []
+
+ for year in range(2017, 2019):
+ folder_path = os.path.join(parent_folder_path, str(year))
+ datasets = data_loader(folder_path)
+ all_datasets.extend(datasets)
+
+ return all_datasets
+
+
+def pdtocdf(datasets):
+ """
+ Processes the xarray Datasets and merges them.
+ """
+ print(datasets)
+ # ds = xr.merge(datasets)
+
+ datasets = [
+ ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
+ ]
+
+ ds = xr.concat(datasets, dim="index")
+
+ # Going to unstack and then combine in a different script
+ # Get rid of the index dimension and just keep the desired ones
+ # ds = ds.unstack('index')
+
+ var_names = ds.data_vars
+ d2 = xr.concat([ds[v] for v in var_names], dim="variable")
+ d2 = d2.assign_coords(variable=("variable", var_names))
+ ds = xr.Dataset(dict(value=d2))
+ ds = ds.sortby("step")
+ ds = ds.sortby("init_time")
+ ds = ds.rename({"Latitude": "y", "Longitude": "x"})
+
+ return ds
+
+
+def main():
+ args = _parse_args()
+
+ if args.output.exists():
+ raise RuntimeError(f'Output file "{args.output}" already exist')
+
+ PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
+ datasets = load_data_from_all_years(PATH)
+ ds = pdtocdf(datasets)
+
+ print(ds)
+
+ ds = ds.unstack("index")
+
+ ds.to_zarr(args.output)
+
+
+# Check if script is being run directly
+if __name__ == "__main__":
+ main()
diff --git a/nwp/excarta/run_script_excarta.sh b/nwp/excarta/run_script_excarta.sh
new file mode 100755
index 0000000..633fde6
--- /dev/null
+++ b/nwp/excarta/run_script_excarta.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# Path to the Python script
+SCRIPT_PATH="/home/zak/nwp/nwp/excarta/parse_excarta_monthly.py"
+
+# Output directory for the Zarr files
+OUTPUT_DIRECTORY="/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/zarr_format/r4/Malta/monthly/malta_excarta_"
+
+# Iterate over the range of years
+for year in {2018..2022}
+do
+ # Iterate over the range of months
+ for month in {1..12}
+ do
+ echo "Processing data for ${year}-${month}..."
+ python $SCRIPT_PATH $OUTPUT_DIRECTORY $year $month
+ done
+done
diff --git a/nwp/icon/app.py b/nwp/icon/app.py
index d41bb9b..e3b16fb 100644
--- a/nwp/icon/app.py
+++ b/nwp/icon/app.py
@@ -33,13 +33,13 @@ def download_model_files(runs=None, parent_folder=None, model="global"):
var_2d_list = GLOBAL_VAR2D_LIST
invariant = GLOBAL_INVARIENT_LIST
pressure_levels = GLOBAL_PRESSURE_LEVELS
- f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
+ f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
else:
var_3d_list = EU_VAR3D_LIST
var_2d_list = EU_VAR2D_LIST
invariant = None
pressure_levels = EU_PRESSURE_LEVELS
- f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days
+ f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days
for run in runs:
run_folder = os.path.join(parent_folder, run)
if not os.path.exists(run_folder):
@@ -69,7 +69,12 @@ def download_model_files(runs=None, parent_folder=None, model="global"):
def process_model_files(
- folder, var_3d_list=None, var_2d_list=None, invariant_list=None, model="global", run="00"
+ folder,
+ var_3d_list=None,
+ var_2d_list=None,
+ invariant_list=None,
+ model="global",
+ run="00",
):
if model == "global":
var_base = "icon_global_icosahedral"
@@ -87,7 +92,7 @@ def process_model_files(
)
lons = lon_ds.tlon.values
lats = lat_ds.tlat.values
- f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
+ f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
else:
var_base = "icon-eu_europe_regular-lat-lon"
var_3d_list = EU_VAR3D_LIST
diff --git a/scripts/convert_icon_archive.py b/scripts/convert_icon_archive.py
index 257ecec..6b1ffbe 100644
--- a/scripts/convert_icon_archive.py
+++ b/scripts/convert_icon_archive.py
@@ -6,8 +6,11 @@
"""
+import multiprocessing as mp
import os
+import subprocess
from glob import glob
+from pathlib import Path
import xarray as xr
import zarr
@@ -18,10 +21,7 @@
EU_VAR2D_LIST,
EU_VAR3D_LIST,
)
-import subprocess
-from pathlib import Path
-import multiprocessing as mp
def decompress(full_bzip_filename: Path, temp_pth: Path) -> str:
"""
@@ -38,7 +38,7 @@ def decompress(full_bzip_filename: Path, temp_pth: Path) -> str:
base_nat_filename = os.path.splitext(base_bzip_filename)[0]
full_nat_filename = os.path.join(temp_pth, base_nat_filename)
if os.path.exists(full_nat_filename):
- return full_nat_filename # Don't decompress a second time
+ return full_nat_filename # Don't decompress a second time
with open(full_nat_filename, "wb") as nat_file_handler:
process = subprocess.run(
["pbzip2", "--decompress", "--keep", "--stdout", full_bzip_filename],
@@ -179,8 +179,8 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None):
encoding = {var: {"compressor": Blosc2("zstd", clevel=9)} for var in dataset_xr.data_vars}
encoding["time"] = {"units": "nanoseconds since 1970-01-01"}
with zarr.ZipStore(
- zarr_path,
- mode="w",
+ zarr_path,
+ mode="w",
) as store:
dataset_xr.chunk(chunking).to_zarr(store, encoding=encoding, compute=True)
done = False
@@ -189,10 +189,10 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None):
api.upload_file(
path_or_fileobj=zarr_path,
path_in_repo=f"data/{dataset_xr.time.dt.year.values}/"
- f"{dataset_xr.time.dt.month.values}/"
- f"{dataset_xr.time.dt.day.values}/"
- f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}"
- f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip",
+ f"{dataset_xr.time.dt.month.values}/"
+ f"{dataset_xr.time.dt.day.values}/"
+ f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}"
+ f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip",
repo_id="openclimatefix/dwd-icon-global"
if model == "global"
else "openclimatefix/dwd-icon-eu",