Skip to content

Commit

Permalink
Adding earthnet2021x download
Browse files Browse the repository at this point in the history
  • Loading branch information
vitusbenson committed Nov 18, 2022
1 parent 8410c83 commit c4f457d
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 4 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ Find more information on https://www.earthnet.tech.
pip install earthnet
```

# Downloading new dataset EarthNet2021x

Ensure you have enough free disk space! We recommend 1TB.
```
import earthnet as en
en.download(dataset = "earthnet2021x", split = "train", save_directory = "data_dir")
```
Where `data_dir` is the directory where EarthNet2021 shall be saved and `splits` is `"all"`or a subset of `["train","iid","ood","extreme","seasonal"]`.


# Download
Ensure you have enough free disk space! We recommend 1TB.
```
Expand Down
5 changes: 3 additions & 2 deletions earthnet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""EarthNet2021 Toolkit
A library for downloading, evaluating and plotting Earth surface forecasts.
"""
__version__ = "0.2.4"
__version__ = "0.3.2"
__author__ = 'Vitus Benson, Christian Requena-Mesa'
__credits__ = 'Max-Planck-Institute for Biogeochemistry'

from earthnet.parallel_score import EarthNetScore
from earthnet.download import Downloader
from earthnet.coords import get_coords_from_cube, get_coords_from_tile
from earthnet.plot_cube import cube_gallery, cube_ndvi_timeseries
from earthnet.plot_cube import cube_gallery, cube_ndvi_timeseries
from earthnet.download_v2 import download, load_minicube
89 changes: 89 additions & 0 deletions earthnet/download_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@



import s3fs
import xarray as xr
from pathlib import Path
from tqdm import tqdm

SPLITS = {
"earthnet2021x": ["train","iid","ood","extreme","seasonal"]
}

def download(dataset = "earthnet2021x", split = "train", save_directory = "data/", proxy = None, limit = None):
"""Download the recent EarthNet datasets
Before downloading, ensure that you have enough free disk space. We recommend 1 TB.
Specify the directory `save_directory`, where it should be saved. Then choose, which of the splits you want to download.
All available splits:
- For dataset `"earthnet2021x"`: `["train","iid","ood","extreme","seasonal"]`
You can also give `"all"` to splits to download all splits of a particular dataset.
Args:
dataset (str): The dataset you wish to download.
split (str): A split of the given dataset, can also be `"all"` to download all splits of a given dataset
save_directory (str): The directory where the data shall be saved in, we recommend data/
proxy (str, optional): If you need to use a http-proxy to access the internet, you may specify it here.
limit (int, optional): If you only want to download a certain number of samples, you can set a limit here.
"""
if split == "all":
for split in SPLITS[dataset]:
download(dataset = dataset, split = split, save_directory=save_directory, proxy = proxy, limit = limit)
else:
s3 = s3fs.S3FileSystem(anon=True,
client_kwargs={
'endpoint_url': 'https://s3.bgc-jena.mpg.de:9000',
'region_name': 'thuringia',
},
config_kwargs = {
"proxies": {'http': proxy}
} if proxy else {}
)

print(f"Finding files of {dataset}, split {split} to download.")
files = s3.find(f"earthnet/{dataset}/{split}")
print(f"Downloading files of {dataset}, split {split}")
for file in tqdm(files[:limit] if limit else files):
savepath = Path(save_directory)/file[9:]
savepath.parent.mkdir(parents = True, exist_ok = True)
s3.download(file, str(savepath))
print(f"Downloaded {dataset}, split {split}.")


def load_minicube(dataset = "earthnet2021x", split = "train", id = "29SND_2018-09-03_2019-01-30_441_569_2745_2873_6_86_42_122", region = None, proxy = None):
"""Load a minicube from a recent EarthNet dataset
Will give you a minicube loaded from the cloud.
All available splits:
- For dataset `"earthnet2021x"`: `["train","iid","ood","extreme","seasonal"]`
Args:
dataset(str): The dataset
split (str): The split
id (str): The id of the minicube
region (str, optional): If you specify the region, downloading will be faster
proxy (str, optional): If you need to use a http-proxy to access the internet, you may specify it here.
"""
s3 = s3fs.S3FileSystem(anon=True,
client_kwargs={
'endpoint_url': 'https://s3.bgc-jena.mpg.de:9000',
'region_name': 'thuringia',
},
config_kwargs = {
"proxies": {'http': proxy}
} if proxy else {}
)
if region:
file = f"earthnet/{dataset}/{split}/{region}/{id}.nc"
else:
print(f"Searching for {id}...")
file = s3.glob(f"earthnet/{dataset}/{split}/**/{id}.nc")[0]
print(f"Found {id}.")

mc = xr.open_dataset(s3.open(file))

return mc
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
"tqdm",
"fire",
"pyproj",
"pandas"
"pandas",
"s3fs",
"xarray"
]


setup(name='earthnet',
version='0.2.4',
version='0.3.2',
description="EarthNet2021 Toolkit: Download, Evaluation, Plotting",
author="Vitus Benson",
author_email="vbenson@bgc-jena.mpg.de",
Expand Down

0 comments on commit c4f457d

Please sign in to comment.