diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py index cd3ed06d4c..251fd7db33 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py @@ -4,6 +4,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import DefaultAzureCredential from azure.ai.ml import MLClient @@ -103,28 +104,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # Download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + # Local data path + repo_root = Path(__file__).resolve().parents[6] + local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset path + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # Extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # Delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py index e67ebe6593..28f208c82c 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import urllib from zipfile import ZipFile @@ -106,28 +107,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # Download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" + # Use local data path + repo_root = Path(__file__).resolve().parents[6] + local_data_path = repo_root / "sample-data" / "image-classification" / "multilabelFridgeObjects.zip" - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset file + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # Extract files - with ZipFile(data_file, "r") as zip: + # Extract files directly from the local path + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # Delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py b/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py index 7027170e4a..2a87c5a9e5 100644 --- a/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-instance-segmentation/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import subprocess import sys import urllib @@ -156,27 +157,23 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # Download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip" + # Local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-instance-segmentation" / "odFridgeObjectsMask.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) # Get the data zip file path data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - # Extract files with ZipFile(data_file, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # Delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py b/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py index 79735c4b8c..ce289b9512 100644 --- a/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-object-detection/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import urllib import xml.etree.ElementTree as ET @@ -159,27 +160,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/foundation-models/system/inference/image-classification/prepare_data.py b/cli/foundation-models/system/inference/image-classification/prepare_data.py index d53a90d01c..ee7fdffc63 100644 --- a/cli/foundation-models/system/inference/image-classification/prepare_data.py +++ b/cli/foundation-models/system/inference/image-classification/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -19,34 +20,29 @@ def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> N # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data + # local data if is_multilabel_dataset == 0: - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" else: - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" - print(f"Downloading data from {download_url}") + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-classification" / "multilabelFridgeObjects.zip" + print(f"Pulling data from {local_data_path}") # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/image-embeddings/prepare_data.py b/cli/foundation-models/system/inference/image-embeddings/prepare_data.py index e5ef7bf09a..c4babfcb01 100644 --- a/cli/foundation-models/system/inference/image-embeddings/prepare_data.py +++ b/cli/foundation-models/system/inference/image-embeddings/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -19,31 +20,24 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/image-object-detection/prepare_data.py b/cli/foundation-models/system/inference/image-object-detection/prepare_data.py index e497f4d791..30f1254a47 100644 --- a/cli/foundation-models/system/inference/image-object-detection/prepare_data.py +++ b/cli/foundation-models/system/inference/image-object-detection/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -14,35 +15,25 @@ def download_and_unzip(dataset_parent_dir: str) -> None: :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded :type dataset_parent_dir: str """ - # Create directory, if it does not exist - os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py b/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py index 97a069fc13..87073d10e9 100644 --- a/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py +++ b/cli/foundation-models/system/inference/image-text-embeddings/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -19,12 +20,12 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) @@ -34,16 +35,12 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Get the name of zip file data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files with ZipFile(data_file, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/image-to-text/prepare_data.py b/cli/foundation-models/system/inference/image-to-text/prepare_data.py index 83a37ac9c2..24ae0aca4d 100644 --- a/cli/foundation-models/system/inference/image-to-text/prepare_data.py +++ b/cli/foundation-models/system/inference/image-to-text/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -17,31 +18,24 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/mask-generation/prepare_data.py b/cli/foundation-models/system/inference/mask-generation/prepare_data.py index 16bdd73e8d..6e0e032a80 100644 --- a/cli/foundation-models/system/inference/mask-generation/prepare_data.py +++ b/cli/foundation-models/system/inference/mask-generation/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -17,32 +18,24 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py b/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py index b6ac22befd..54698e56ba 100644 --- a/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py +++ b/cli/foundation-models/system/inference/visual-question-answering/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -17,31 +18,24 @@ def download_and_unzip(dataset_parent_dir: str) -> None: # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" - print(f"Downloading data from {download_url}") + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir diff --git a/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py b/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py index 02e6327104..4806d16ddb 100644 --- a/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py +++ b/cli/foundation-models/system/inference/zero-shot-image-classification/prepare_data.py @@ -2,6 +2,7 @@ import base64 import json import os +from pathlib import Path import shutil import urllib.request import pandas as pd @@ -11,40 +12,32 @@ LABELS = "water_bottle, milk_bottle, carton, can" -def download_and_unzip(dataset_parent_dir: str) -> None: - """Download image dataset and unzip it. +def upload_data_and_extract(dataset_parent_dir: str) -> str: + """Extract image dataset from local path. - :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded + :param dataset_parent_dir: dataset parent directory to which dataset will be extracted :type dataset_parent_dir: str """ # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - print(f"Downloading data from {download_url}") + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-classifcation" / "fridgeObjects.zip" - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + # Extract current dataset name from dataset path + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) if os.path.exists(dataset_dir): shutil.rmtree(dataset_dir) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) return dataset_dir @@ -151,7 +144,7 @@ def prepare_data_for_batch_inference(dataset_dir: str) -> None: args, unknown = parser.parse_known_args() args_dict = vars(args) - dataset_dir = download_and_unzip( + dataset_dir = unzip( dataset_parent_dir=os.path.join( os.path.dirname(os.path.realpath(__file__)), args.data_path ), diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py index d76cd78d2c..2de4edcc06 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/prepare_data.py @@ -3,6 +3,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import InteractiveBrowserCredential from azure.ai.ml import MLClient @@ -98,33 +99,29 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): save_ml_table_file(validation_mltable_path, validation_mltable_file_contents) -def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): - - # Create directory, if it does not exist - os.makedirs(dataset_parent_dir, exist_ok=True) +def unzip(dataset_parent_dir: str) -> str: + """Unzip image dataset from local path.""" + repo_root = Path(__file__).resolve().parents[4] + local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" - # download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" - - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] - # Get dataset path for later use + # Extract current dataset name from dataset path + dataset_name = os.path.basename(local_data_path).split(".")[0] dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir + + +def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): + # Create directory, if it does not exist + os.makedirs(dataset_parent_dir, exist_ok=True) + + # Use local file instead of downloading + dataset_dir = unzip(dataset_parent_dir) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py index a725d918de..b18c3bc4a5 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/prepare_data.py @@ -3,6 +3,7 @@ import os import urllib from zipfile import ZipFile +from pathlib import Path from azure.identity import InteractiveBrowserCredential from azure.ai.ml import MLClient @@ -101,33 +102,29 @@ def create_jsonl_and_mltable_files(uri_folder_data_path, dataset_dir): save_ml_table_file(validation_mltable_path, validation_mltable_file_contents) -def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): - - # Create directory, if it does not exist - os.makedirs(dataset_parent_dir, exist_ok=True) +def unzip(dataset_parent_dir: str) -> str: + """Unzip image dataset from local path.""" + repo_root = Path(__file__).resolve().parents[4] + local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" - # download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" - - # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] - # Get dataset path for later use + # Extract current dataset name from dataset path + dataset_name = os.path.basename(local_data_path).split(".")[0] dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - - # extract files - with ZipFile(data_file, "r") as zip: + # Extract files + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) + return dataset_dir + + +def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): + # Create directory, if it does not exist + os.makedirs(dataset_parent_dir, exist_ok=True) + + # Use local file instead of downloading + dataset_dir = unzip(dataset_parent_dir) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py b/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py index 4f49de01dc..125f8cd9d2 100644 --- a/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py +++ b/cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/prepare_data.py @@ -1,6 +1,7 @@ import argparse import json import os +from pathlib import Path import urllib import xml.etree.ElementTree as ET @@ -142,27 +143,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" + # local data + repo_root = Path(__file__).resolve().parents[4] + local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py index 57fdc34ac6..7bed80e8f2 100644 --- a/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/prepare_data.py @@ -1,6 +1,7 @@ import argparse import json import os +from pathlib import Path import urllib from zipfile import ZipFile @@ -103,28 +104,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip" + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the name of zip file - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py index a6d03ac51e..686ab4aa5d 100644 --- a/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/prepare_data.py @@ -1,6 +1,7 @@ import argparse import json import os +from pathlib import Path import urllib from zipfile import ZipFile @@ -106,28 +107,23 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # Create directory, if it does not exist os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - print("Downloading data.") - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip" + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-classification" / "multilabelFridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) # Get the name of zip file data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - # Download data from public url - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files with ZipFile(data_file, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py b/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py index 02e7e36278..0b2e7a54d7 100644 --- a/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py +++ b/cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/prepare_data.py @@ -1,6 +1,7 @@ import argparse import json import os +from pathlib import Path import urllib import xml.etree.ElementTree as ET @@ -142,27 +143,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir): # create data folder if it doesnt exist. os.makedirs(dataset_parent_dir, exist_ok=True) - # download data - download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip" + # local data + repo_root = Path(__file__).resolve().parents[5] + local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip" # Extract current dataset name from dataset url - dataset_name = os.path.basename(download_url).split(".")[0] + dataset_name = os.path.basename(local_data_path).split(".")[0] # Get dataset path for later use dataset_dir = os.path.join(dataset_parent_dir, dataset_name) - # Get the data zip file path - data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip") - - # Download the dataset - urllib.request.urlretrieve(download_url, filename=data_file) - # extract files - with ZipFile(data_file, "r") as zip: + with ZipFile(local_data_path, "r") as zip: print("extracting files...") zip.extractall(path=dataset_parent_dir) print("done") - # delete zip file - os.remove(data_file) # Upload data and create a data asset URI folder print("Uploading data to blob storage") diff --git a/sample-data/image-classification/fridgeObjects.zip b/sample-data/image-classification/fridgeObjects.zip new file mode 100644 index 0000000000..1ea6e4a1e6 Binary files /dev/null and b/sample-data/image-classification/fridgeObjects.zip differ diff --git a/sample-data/image-classification/multilabelFridgeObjects.zip b/sample-data/image-classification/multilabelFridgeObjects.zip new file mode 100644 index 0000000000..7bb702ff8f Binary files /dev/null and b/sample-data/image-classification/multilabelFridgeObjects.zip differ diff --git a/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip b/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip new file mode 100644 index 0000000000..099b09aedc Binary files /dev/null and b/sample-data/image-instance-segmentation/odFridgeObjectsMask.zip differ diff --git a/sample-data/image-object-detection/odFridgeObjects.zip b/sample-data/image-object-detection/odFridgeObjects.zip new file mode 100644 index 0000000000..273696796d Binary files /dev/null and b/sample-data/image-object-detection/odFridgeObjects.zip differ diff --git a/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb b/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb index e34cb260e5..3500b06275 100644 --- a/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-classification/multiclass-classification/image-multiclass-classification.ipynb @@ -278,6 +278,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -287,27 +288,20 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[7]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb b/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb index ebf56f8863..d8e7ea0529 100644 --- a/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-classification/multilabel-classification/image-multilabel-classification.ipynb @@ -275,6 +275,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -284,27 +285,20 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[7]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"multilabelFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb b/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb index 1d96fff4fa..0b3183f4a4 100644 --- a/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb +++ b/sdk/python/foundation-models/system/evaluation/image-object-detection/image-object-detection.ipynb @@ -274,6 +274,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -283,11 +284,12 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", @@ -295,17 +297,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb index 87626de632..74e8eac9f9 100644 --- a/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.ipynb @@ -285,6 +285,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -294,27 +295,23 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[7]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb index 90b1b07ead..6f21e805db 100644 --- a/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.ipynb @@ -282,6 +282,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -291,27 +292,21 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[7]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"multilabelFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", + "\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb index a4402eeb9d..a999dbd0cf 100644 --- a/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb +++ b/sdk/python/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-object-detection.ipynb @@ -301,6 +301,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -310,11 +311,12 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", @@ -322,17 +324,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb index 4d737dcc7d..f5945ea263 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-batch-endpoint.ipynb @@ -170,6 +170,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -180,11 +181,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", @@ -194,17 +196,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb index 44092dc677..3ca9545d29 100644 --- a/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-classification/image-classification-online-endpoint.ipynb @@ -136,6 +136,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -145,27 +146,23 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb index 632d1dfd44..f079fc2b5e 100644 --- a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,11 +172,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", @@ -185,17 +187,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb index 7d9c025610..957b138417 100644 --- a/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-embeddings/image-embeddings-online-endpoint.ipynb @@ -127,6 +127,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -136,27 +137,23 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { @@ -324,7 +321,11 @@ ] } ], - "metadata": {}, + "metadata": { + "language_info": { + "name": "python" + } + }, "nbformat": 4, "nbformat_minor": 2 } diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb index 93f946a615..344450ef68 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,11 +172,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", @@ -185,17 +187,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb index 6c5cea7f50..970d354b1d 100644 --- a/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-object-detection/image-object-detection-online-endpoint.ipynb @@ -127,6 +127,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -136,27 +137,23 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb index b610478e7b..759fe6d741 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-batch-endpoint.ipynb @@ -166,6 +166,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -176,11 +177,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", @@ -190,17 +192,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb index c67d6e02c4..9e660c0f9e 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/image-text-embeddings-online-endpoint.ipynb @@ -130,6 +130,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -139,27 +140,23 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb b/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb index 0d52ef7bcf..adf97900ed 100644 --- a/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb +++ b/sdk/python/foundation-models/system/inference/image-text-embeddings/text-to-image-retrieval.ipynb @@ -102,6 +102,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -111,11 +112,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)" ] @@ -129,16 +131,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb index c767466d37..9cb7d5fd96 100644 --- a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,11 +172,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", @@ -185,17 +187,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.ipynb index 8b6c237d51..64893260a9 100644 --- a/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/image-to-text/image-to-text-online-endpoint.ipynb @@ -126,6 +126,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -135,27 +136,23 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-batch-endpoint.ipynb index 4d2011cdf1..e82141b0e7 100644 --- a/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-batch-endpoint.ipynb @@ -153,6 +153,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -163,11 +164,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", @@ -177,17 +179,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-online-endpoint.ipynb index 5ea054e763..46524e9aea 100644 --- a/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/mask-generation/mask-generation-online-endpoint.ipynb @@ -119,6 +119,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -128,27 +129,23 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb index 49cc039cd7..25d4585ecc 100644 --- a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-batch-endpoint.ipynb @@ -161,6 +161,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -171,11 +172,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", @@ -185,17 +187,11 @@ "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb index b5a7edb022..d0acc1e0e0 100644 --- a/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/visual-question-answering/visual-question-answering-online-endpoint.ipynb @@ -126,6 +126,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -135,27 +136,23 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "# Get the data zip file path\n", "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-batch-endpoint.ipynb b/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-batch-endpoint.ipynb index 29ddef08c3..196d21f059 100644 --- a/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-batch-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-batch-endpoint.ipynb @@ -166,6 +166,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "import shutil\n", "from zipfile import ZipFile\n", @@ -176,31 +177,23 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", "if os.path.exists(dataset_dir):\n", " shutil.rmtree(dataset_dir)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")" ] }, { diff --git a/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-online-endpoint.ipynb b/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-online-endpoint.ipynb index 87bc9cbf70..691334c9af 100644 --- a/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-online-endpoint.ipynb +++ b/sdk/python/foundation-models/system/inference/zero-shot-image-classification/zero-shot-image-classification-online-endpoint.ipynb @@ -133,6 +133,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -142,27 +143,20 @@ "# Create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# Download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# Extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# Delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multiclass-task-fridge-items/automl-image-classification-multiclass-task-fridge-items.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multiclass-task-fridge-items/automl-image-classification-multiclass-task-fridge-items.ipynb index 01f8f364d8..1b6771d677 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multiclass-task-fridge-items/automl-image-classification-multiclass-task-fridge-items.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multiclass-task-fridge-items/automl-image-classification-multiclass-task-fridge-items.ipynb @@ -136,6 +136,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -145,11 +146,12 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[5]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)" ] @@ -160,19 +162,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multilabel-task-fridge-items/automl-image-classification-multilabel-task-fridge-items.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multilabel-task-fridge-items/automl-image-classification-multilabel-task-fridge-items.ipynb index ba864d1f23..c8d3304400 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multilabel-task-fridge-items/automl-image-classification-multilabel-task-fridge-items.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-classification-multilabel-task-fridge-items/automl-image-classification-multilabel-task-fridge-items.ipynb @@ -134,6 +134,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -143,27 +144,20 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[5]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"multilabelFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items-batch-scoring/image-object-detection-batch-scoring-non-mlflow-model.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items-batch-scoring/image-object-detection-batch-scoring-non-mlflow-model.ipynb index c93937e816..0ca5d7276f 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items-batch-scoring/image-object-detection-batch-scoring-non-mlflow-model.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items-batch-scoring/image-object-detection-batch-scoring-non-mlflow-model.ipynb @@ -118,6 +118,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -127,27 +128,20 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[5]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items/automl-image-object-detection-task-fridge-items.ipynb b/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items/automl-image-object-detection-task-fridge-items.ipynb index 1e622208dc..51438d450f 100644 --- a/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items/automl-image-object-detection-task-fridge-items.ipynb +++ b/sdk/python/jobs/automl-standalone-jobs/automl-image-object-detection-task-fridge-items/automl-image-object-detection-task-fridge-items.ipynb @@ -133,6 +133,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -142,27 +143,20 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[5]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multiclass-in-pipeline/automl-image-classification-multiclass-in-pipeline.ipynb b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multiclass-in-pipeline/automl-image-classification-multiclass-in-pipeline.ipynb index 60e96335fc..87f8c1ce2b 100644 --- a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multiclass-in-pipeline/automl-image-classification-multiclass-in-pipeline.ipynb +++ b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multiclass-in-pipeline/automl-image-classification-multiclass-in-pipeline.ipynb @@ -114,6 +114,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -123,27 +124,20 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"fridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multilabel-in-pipeline/automl-image-classification-multilabel-in-pipeline.ipynb b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multilabel-in-pipeline/automl-image-classification-multilabel-in-pipeline.ipynb index 6766bce0f9..a903b28d30 100644 --- a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multilabel-in-pipeline/automl-image-classification-multilabel-in-pipeline.ipynb +++ b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-classification-multilabel-in-pipeline/automl-image-classification-multilabel-in-pipeline.ipynb @@ -110,6 +110,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -119,27 +120,20 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-classification\" / \"multilabelFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, { diff --git a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-object-detection-in-pipeline/automl-image-object-detection-in-pipeline.ipynb b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-object-detection-in-pipeline/automl-image-object-detection-in-pipeline.ipynb index b294129270..5d18bd251b 100644 --- a/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-object-detection-in-pipeline/automl-image-object-detection-in-pipeline.ipynb +++ b/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-image-object-detection-in-pipeline/automl-image-object-detection-in-pipeline.ipynb @@ -109,6 +109,7 @@ "outputs": [], "source": [ "import os\n", + "from pathlib import Path\n", "import urllib\n", "from zipfile import ZipFile\n", "\n", @@ -118,27 +119,20 @@ "# create data folder if it doesnt exist.\n", "os.makedirs(dataset_parent_dir, exist_ok=True)\n", "\n", - "# download data\n", - "download_url = \"https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip\"\n", + "# local data\n", + "repo_root = Path(__file__).resolve().parents[6]\n", + "local_data_path = repo_root / \"sample-data\" / \"image-object-detection\" / \"odFridgeObjects.zip\"\n", "\n", "# Extract current dataset name from dataset url\n", - "dataset_name = os.path.split(download_url)[-1].split(\".\")[0]\n", + "dataset_name = os.path.split(local_data_path)[-1].split(\".\")[0]\n", "# Get dataset path for later use\n", "dataset_dir = os.path.join(dataset_parent_dir, dataset_name)\n", "\n", - "# Get the data zip file path\n", - "data_file = os.path.join(dataset_parent_dir, f\"{dataset_name}.zip\")\n", - "\n", - "# Download the dataset\n", - "urllib.request.urlretrieve(download_url, filename=data_file)\n", - "\n", "# extract files\n", - "with ZipFile(data_file, \"r\") as zip:\n", + "with ZipFile(local_data_path, \"r\") as zip:\n", " print(\"extracting files...\")\n", " zip.extractall(path=dataset_parent_dir)\n", - " print(\"done\")\n", - "# delete zip file\n", - "os.remove(data_file)" + " print(\"done\")\n" ] }, {