Skip to content

Commit 358c5e4

Browse files
authored
Merge pull request #4 from openml/feature/dataloader_imagedataset
Feature/dataloader image dataset
2 parents 3e404b5 + 32421ab commit 358c5e4

12 files changed

+606
-196
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,7 @@ openml_pytorch/__pycache__/
44
.idea/
55

66
openml_pytorch/layers/__pycache__/
7+
8+
venv
9+
10+
model.onnx

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,10 @@ run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
3232
run.publish()
3333
print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
3434
```
35-
Note: The input layer of the network should be compatible with OpenML data output shape. Please check examples for more information.
35+
Note: The input layer of the network should be compatible with OpenML data output shape. Please check [examples](/examples/) for more information.
36+
37+
Additionally, if you want to publish the run with onnx file, then you must call ```openml_pytorch.add_onnx_to_run()``` immediately before ```run.publish()```.
38+
39+
```python
40+
run = openml_pytorch.add_onnx_to_run(run)
41+
```

examples/create_new_task.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import openml
2+
from openml.tasks import OpenMLClassificationTask
3+
4+
task = openml.tasks.get_task(361175)
5+
# openml.config.apikey = 'KEY'
6+
# Define task parameters
7+
task_type = openml.tasks.TaskType.SUPERVISED_CLASSIFICATION
8+
evaluation_measure = 'predictive_accuracy'
9+
estimation_procedure = {
10+
'type': 'crossvalidation',
11+
'parameters': {
12+
'number_repeats': '1',
13+
'number_folds': '10',
14+
'percentage': '',
15+
'stratified_sampling': 'true'
16+
},
17+
'data_splits_url': 'https://api.openml.org/api_splits/get/361175/Task_361175_splits.arff'
18+
}
19+
target_name = 'CATEGORY'
20+
class_labels = ['Adrenal_gland', 'Bile-duct', 'Bladder', 'Breast', 'Cervix', 'Colon', 'Esophagus', 'HeadNeck', 'Kidney', 'Liver', 'Lung', 'Ovarian', 'Pancreatic', 'Prostate', 'Skin', 'Stomach', 'Testis', 'Thyroid', 'Uterus']
21+
cost_matrix = None
22+
23+
# 'split': <openml.tasks.split.OpenMLSplit object at 0x7efca59476a0>
24+
25+
# Create the task
26+
new_task = openml.tasks.create_task(
27+
task_type=task_type,
28+
dataset_id=task.dataset_id,
29+
estimation_procedure_id = task.estimation_procedure_id,
30+
# estimation_procedure=estimation_procedure,
31+
target_name=target_name,
32+
class_labels=class_labels,
33+
cost_matrix=cost_matrix
34+
)
35+
36+
print(new_task)
37+
38+
new_task.publish()
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import torch
2+
import torch.nn as nn
3+
import torch.nn.functional as F
4+
import openml_pytorch
5+
import openml
6+
import warnings
7+
8+
# warnings.simplefilter(action='ignore', category=FutureWarning)
9+
warnings.simplefilter(action='ignore')
10+
11+
12+
def evaluate_torch_model(model):
13+
# Download CV splits
14+
task = openml.tasks.get_task(362070)
15+
# Evaluate model
16+
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
17+
# Publish
18+
run = openml_pytorch.add_onnx_to_run(run) # Optional, to inspect afterward
19+
run.publish()
20+
return run
21+
22+
from torchvision import models
23+
from torchvision.transforms import v2
24+
25+
class Model2(nn.Module):
26+
def __init__(self, num_classes=67):
27+
super(Model2, self).__init__()
28+
self.conv1 = nn.Conv2d(3, 6, 5)
29+
self.pool = nn.MaxPool2d(2, 2)
30+
self.conv2 = nn.Conv2d(6, 16, 5)
31+
self.fc1 = nn.Linear(13456, 120)
32+
self.fc2 = nn.Linear(120, 84)
33+
self.fc3 = nn.Linear(84, num_classes)
34+
35+
def forward(self, x):
36+
x = self.pool(F.relu(self.conv1(x)))
37+
x = self.pool(F.relu(self.conv2(x)))
38+
x = torch.flatten(x, 1) # flatten all dimensions except batch
39+
x = F.relu(self.fc1(x))
40+
x = F.relu(self.fc2(x))
41+
x = self.fc3(x)
42+
return x
43+
44+
# Training parameters
45+
openml_pytorch.config.batch_size = 32
46+
openml_pytorch.config.epoch_count = 1
47+
openml_pytorch.config.image_size = 128
48+
49+
transforms = v2.Compose([
50+
v2.RandomResizedCrop(size=(224, 224), antialias=True),
51+
v2.RandomHorizontalFlip(p=0.5),
52+
v2.ToDtype(torch.float32, scale=True),
53+
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
54+
])
55+
56+
openml_pytorch.config.data_augemntation = transforms
57+
openml_pytorch.config.perform_validation = True
58+
59+
openml.config.apikey = 'key'
60+
openml_pytorch.config.file_dir = openml.config.get_cache_directory()+'/datasets/45923/Images/'
61+
openml_pytorch.config.filename_col = "Filename"
62+
63+
# Run
64+
run = evaluate_torch_model(Model2()) # Replace with your model
65+
print('URL for run: %s/run/%d?api_key=%s' % (openml.config.server, run.run_id, openml.config.apikey))
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""
2+
PyTorch image classification model example
3+
==================
4+
5+
An example of a pytorch network that classifies meta album images.
6+
"""
7+
8+
import torch.nn
9+
import torch.optim
10+
11+
import openml
12+
import openml_pytorch
13+
import openml_pytorch.layers
14+
import openml_pytorch.config
15+
import logging
16+
17+
import warnings
18+
import pandas as pd
19+
20+
# Suppress FutureWarning messages
21+
warnings.simplefilter(action='ignore')
22+
23+
############################################################################
24+
# Enable logging in order to observe the progress while running the example.
25+
openml.config.logger.setLevel(logging.DEBUG)
26+
openml_pytorch.config.logger.setLevel(logging.DEBUG)
27+
############################################################################
28+
29+
############################################################################
30+
import torch.nn as nn
31+
import torch.nn.functional as F
32+
33+
# Example model. You can do better :)
34+
class Net(nn.Module):
35+
def __init__(self):
36+
super().__init__()
37+
self.conv1 = nn.Conv2d(3, 6, 5)
38+
self.pool = nn.MaxPool2d(2, 2)
39+
self.conv2 = nn.Conv2d(6, 16, 5)
40+
self.fc1 = nn.Linear(13456, 120)
41+
self.fc2 = nn.Linear(120, 84)
42+
self.fc3 = nn.Linear(84, 19) # To user - Remember to set correct size of last layer.
43+
44+
def forward(self, x):
45+
x = self.pool(F.relu(self.conv1(x)))
46+
x = self.pool(F.relu(self.conv2(x)))
47+
x = torch.flatten(x, 1) # flatten all dimensions except batch
48+
x = F.relu(self.fc1(x))
49+
x = F.relu(self.fc2(x))
50+
x = self.fc3(x)
51+
return x
52+
53+
net = Net()
54+
55+
############################################################################
56+
openml.config.apikey = 'key'
57+
openml_pytorch.config.file_dir = openml.config.get_cache_directory()+'/datasets/44312/PNU_Micro/images/'
58+
openml_pytorch.config.filename_col = "FILE_NAME"
59+
openml_pytorch.config.perform_validation = False
60+
############################################################################
61+
# The main network, composed of the above specified networks.
62+
model = net
63+
64+
############################################################################
65+
# Download the OpenML task for the Meta_Album_PNU_Micro dataset.
66+
task = openml.tasks.get_task(361987)
67+
68+
############################################################################
69+
# Run the model on the task (requires an API key).m
70+
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
71+
72+
# If you want to publish the run with the onnx file,
73+
# then you must call openml_pytorch.add_onnx_to_run() immediately before run.publish().
74+
# When you publish, onnx file of last trained model is uploaded.
75+
# Careful to not call this function when another run_model_on_task is called in between,
76+
# as during publish later, only the last trained model (from last run_model_on_task call) is uploaded.
77+
run = openml_pytorch.add_onnx_to_run(run)
78+
79+
run.publish()
80+
81+
print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
82+
############################################################################
83+
84+
# Visualize model in netron
85+
86+
from urllib.request import urlretrieve
87+
88+
published_run = openml.runs.get_run(run.run_id)
89+
url = 'https://api.openml.org/data/download/{}/model.onnx'.format(published_run.output_files['onnx_model'])
90+
91+
file_path, _ = urlretrieve(url, 'model.onnx')
92+
93+
import netron
94+
# Visualize the ONNX model using Netron
95+
netron.start(file_path)
96+
97+
98+
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
"""
2+
PyTorch image classification model using pre-trained ResNet model example
3+
==================
4+
5+
An example of a pytorch network that classifies meta album images.
6+
"""
7+
8+
import torch.nn
9+
import torch.optim
10+
11+
import openml
12+
import openml_pytorch
13+
import openml_pytorch.layers
14+
import openml_pytorch.config
15+
import logging
16+
17+
############################################################################
18+
# Enable logging in order to observe the progress while running the example.
19+
openml.config.logger.setLevel(logging.DEBUG)
20+
openml_pytorch.config.logger.setLevel(logging.DEBUG)
21+
############################################################################
22+
23+
############################################################################
24+
import torch.nn as nn
25+
import torch.nn.functional as F
26+
27+
# Example model. You can do better :)
28+
import torchvision.models as models
29+
30+
# Load the pre-trained ResNet model
31+
model = models.resnet50(pretrained=True)
32+
33+
# Modify the last fully connected layer to the required number of classes
34+
num_classes = 20
35+
in_features = model.fc.in_features
36+
model.fc = nn.Linear(in_features, num_classes)
37+
38+
# Optional: If you're fine-tuning, you may want to freeze the pre-trained layers
39+
for param in model.parameters():
40+
param.requires_grad = False
41+
42+
# If you want to train the last layer only (the newly added layer)
43+
for param in model.fc.parameters():
44+
param.requires_grad = True
45+
46+
############################################################################
47+
# Setting an appropriate optimizer
48+
from openml import OpenMLTask
49+
50+
def custom_optimizer_gen(model: torch.nn.Module, task: OpenMLTask) -> torch.optim.Optimizer:
51+
return torch.optim.Adam(model.fc.parameters())
52+
53+
openml_pytorch.config.optimizer_gen = custom_optimizer_gen
54+
55+
############################################################################
56+
57+
# openml.config.apikey = 'KEY'
58+
59+
############################################################################
60+
# Download the OpenML task for the Meta_Album_PNU_Micro dataset.
61+
task = openml.tasks.get_task(361152)
62+
63+
############################################################################
64+
# Run the model on the task (requires an API key).m
65+
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
66+
67+
# Publish the experiment on OpenML (optional, requires an API key).
68+
run.publish()
69+
70+
print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
71+
72+
############################################################################
73+
74+
# Visualize model in netron
75+
import netron
76+
77+
# Define input size
78+
input_size = (32,3,128,128)
79+
80+
# Create a dummy input with the specified size
81+
dummy_input = torch.randn(input_size)
82+
83+
# Export the model to ONNX
84+
torch.onnx.export(model, dummy_input, "model.onnx", verbose=True)
85+
86+
# Visualize the ONNX model using Netron
87+
netron.start("model.onnx")

examples/pytorch_sequential_classification_model.py

Lines changed: 0 additions & 82 deletions
This file was deleted.

0 commit comments

Comments
 (0)