From 01c13b162fa3c8307bc9fa5a8b23e36fde4c8db5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2024 13:02:23 +0000 Subject: [PATCH] Deployed 6411e5e with MkDocs version: 1.6.1 --- .nojekyll | 0 404.html | 640 ++ API reference/Callbacks/index.html | 2761 +++++++ API reference/Custom Datasets/index.html | 1132 +++ API reference/Metrics/index.html | 1121 +++ API reference/OpenML Connection/index.html | 4975 ++++++++++++ API reference/Trainer/index.html | 2422 ++++++ Examples/Create Dataset and Task/index.html | 1669 ++++ Examples/Image Classification Task/index.html | 1830 +++++ .../index.html | 1845 +++++ .../Sequential Classification Task/index.html | 1820 +++++ Examples/Tabular Classification/index.html | 1610 ++++ Examples/index.html | 663 ++ Integrations of OpenML in PyTorch/index.html | 731 ++ Limitations of the API/index.html | 680 ++ Philosophy behind the API Design/index.html | 735 ++ assets/_mkdocstrings.css | 143 + assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.83f73b43.min.js | 16 + assets/javascripts/bundle.83f73b43.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.6ce7567c.min.js | 42 + .../workers/search.6ce7567c.min.js.map | 7 + assets/stylesheets/main.0253249f.min.css | 1 + assets/stylesheets/main.0253249f.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + index.html | 853 +++ objects.inv | Bin 0 -> 743 bytes search/search_index.json | 1 + sitemap.xml | 3 + sitemap.xml.gz | Bin 0 -> 127 bytes 65 files changed, 32911 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 API reference/Callbacks/index.html create mode 100644 API reference/Custom Datasets/index.html create mode 100644 API reference/Metrics/index.html create mode 100644 API reference/OpenML Connection/index.html create mode 100644 API reference/Trainer/index.html create mode 100644 Examples/Create Dataset and Task/index.html create mode 100644 Examples/Image Classification Task/index.html create mode 100644 Examples/Pretrained Transformer Image Classification Task/index.html create mode 100644 Examples/Sequential Classification Task/index.html create mode 100644 Examples/Tabular Classification/index.html create mode 100644 Examples/index.html create mode 100644 Integrations of OpenML in PyTorch/index.html create mode 100644 Limitations of the API/index.html create mode 100644 Philosophy behind the API Design/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.83f73b43.min.js create mode 100644 assets/javascripts/bundle.83f73b43.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js.map create mode 100644 assets/stylesheets/main.0253249f.min.css create mode 100644 assets/stylesheets/main.0253249f.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 index.html create mode 100644 objects.inv create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..90b4aab --- /dev/null +++ b/404.html @@ -0,0 +1,640 @@ + + + +
+ + + + + + + + + + + + + + +Callbacks module contains classes and functions for handling callback functions during an event-driven process. This makes it easier to customize the behavior of the training loop and add additional functionality to the training process without modifying the core code.
+To use a callback, create a class that inherits from the Callback class and implement the necessary methods. Callbacks can be used to perform actions at different stages of the training process, such as at the beginning or end of an epoch, batch, or fitting process. Then pass the callback object to the Trainer.
+ + + + + + + + +AvgStats
+
+
+¶AvgStats class is used to track and accumulate average statistics (like loss and other metrics) during training and validation phases.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
metrics |
+
+ list
+ |
+
+
+
+ A list of metric functions to be tracked. + |
+
in_train |
+
+ bool
+ |
+
+
+
+ A flag to indicate if the statistics are for the training phase. + |
+
Methods:
+Name | +Description | +
---|---|
__init__ |
+
+
+
+ Initializes the AvgStats with metrics and in_train flag. + |
+
reset |
+
+
+
+ Resets the accumulated statistics. + |
+
all_stats |
+
+
+
+ Property that returns all accumulated statistics including loss and metrics. + |
+
avg_stats |
+
+
+
+ Property that returns the average of the accumulated statistics. + |
+
accumulate |
+
+
+
+ Accumulates the statistics using the data from the given run. + |
+
__repr__ |
+
+
+
+ Returns a string representation of the average statistics. + |
+
openml_pytorch/callbacks.py
278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 |
|
AvgStatsCallBack
+
+
+¶
+ Bases: Callback
AvgStatsCallBack class is a custom callback used to track and print average statistics for training and validation phases during the training loop.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ metrics
+ |
+ + | +
+
+
+ A list of metric functions to evaluate during training and validation. + |
+ + required + | +
Methods:
+Name | +Description | +
---|---|
__init__ |
+
+
+
+ Initializes the callback with given metrics and sets up AvgStats objects for both training and validation phases. + |
+
begin_epoch |
+
+
+
+ Resets the statistics at the beginning of each epoch. + |
+
after_loss |
+
+
+
+ Accumulates the metrics after computing the loss, differentiating between training and validation phases. + |
+
after_epoch |
+
+
+
+ Prints the accumulated statistics for both training and validation phases after each epoch. + |
+
openml_pytorch/callbacks.py
333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 |
|
Callback
+
+
+¶Callback class is a base class designed for handling different callback functions during +an event-driven process. It provides functionality to set a runner, retrieve the class +name in snake_case format, directly call callback methods, and delegate attribute access +to the runner if the attribute does not exist in the Callback class.
+The _order is used to decide the order of Callbacks.
+ + + + + + +openml_pytorch/callbacks.py
104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 |
|
ParamScheduler
+
+
+¶
+ Bases: Callback
Manages scheduling of parameter adjustments over the course of training.
+ + + + + + +openml_pytorch/callbacks.py
135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 |
|
begin_batch()
+
+¶Apply parameter adjustments at the beginning of each batch if in training mode.
+ +openml_pytorch/callbacks.py
161 +162 +163 +164 +165 +166 |
|
begin_fit()
+
+¶Prepare the scheduler at the start of the fitting process. +This method ensures that sched_funcs is a list with one function per parameter group.
+ +openml_pytorch/callbacks.py
144 +145 +146 +147 +148 +149 +150 |
|
set_param()
+
+¶Adjust the parameter value for each parameter group based on the scheduling function. +Ensures the number of scheduling functions matches the number of parameter groups.
+ +openml_pytorch/callbacks.py
152 +153 +154 +155 +156 +157 +158 +159 |
|
Recorder
+
+
+¶
+ Bases: Callback
Recorder is a callback class used to record learning rates and losses during the training process.
+ + + + + + +openml_pytorch/callbacks.py
168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 |
|
after_batch()
+
+¶Handles operations to execute after each training batch.
+Modifies the learning rate for each parameter group in the optimizer +and appends the current learning rate and loss to the corresponding lists.
+ +openml_pytorch/callbacks.py
185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 |
|
begin_fit()
+
+¶Initializes attributes necessary for the fitting process.
+Sets up learning rates and losses storage.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
self.lrs |
+
+ list
+ |
+
+
+
+ A list of lists, where each inner list will hold learning rates for a parameter group. + |
+
self.losses |
+
+ list
+ |
+
+
+
+ An empty list to store loss values during the fitting process. + |
+
openml_pytorch/callbacks.py
172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 |
|
plot(skip_last=0, pgid=-1)
+
+¶Generates a plot of the loss values against the learning rates.
+ +openml_pytorch/callbacks.py
211 +212 +213 +214 +215 +216 +217 +218 +219 |
|
plot_loss(skip_last=0)
+
+¶Plots the loss for a given parameter group.
+ +openml_pytorch/callbacks.py
205 +206 +207 +208 +209 |
|
plot_lr(pgid=-1)
+
+¶Plots the learning rate for a given parameter group.
+ +openml_pytorch/callbacks.py
199 +200 +201 +202 +203 |
|
TrainEvalCallback
+
+
+¶
+ Bases: Callback
TrainEvalCallback class is a custom callback used during the training +and validation phases of a machine learning model to perform specific +actions at the beginning and after certain events.
+Methods:
+begin_fit(): + Initialize the number of epochs and iteration counts at the start + of the fitting process.
+after_batch(): + Update the epoch and iteration counts after each batch during + training.
+begin_epoch(): + Set the current epoch, switch the model to training mode, and + indicate that the model is in training.
+begin_validate(): + Switch the model to evaluation mode and indicate that the model + is in validation.
+ + + + + + +openml_pytorch/callbacks.py
222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 |
|
annealer(f)
+
+¶A decorator function for creating a partially applied function with predefined start and end arguments.
+The inner function _inner
captures the start
and end
parameters and returns a partial
object that fixes these parameters for the decorated function f
.
openml_pytorch/callbacks.py
36 +37 +38 +39 +40 +41 +42 +43 +44 |
|
camel2snake(name)
+
+¶Convert name
from camel case to snake case.
openml_pytorch/callbacks.py
96 + 97 + 98 + 99 +100 +101 |
|
combine_scheds(pcts, scheds)
+
+¶Combine multiple scheduling functions.
+ +openml_pytorch/callbacks.py
79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 |
|
listify(o=None)
+
+¶Convert o
to list. If o
is None, return empty list.
openml_pytorch/callbacks.py
21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 |
|
sched_cos(start, end, pos)
+
+¶A cosine schedule function.
+ +openml_pytorch/callbacks.py
55 +56 +57 +58 +59 +60 |
|
sched_exp(start, end, pos)
+
+¶Exponential schedule function.
+ +openml_pytorch/callbacks.py
71 +72 +73 +74 +75 +76 |
|
sched_lin(start, end, pos)
+
+¶A linear schedule function.
+ +openml_pytorch/callbacks.py
47 +48 +49 +50 +51 +52 |
|
sched_no(start, end, pos)
+
+¶Disabled scheduling.
+ +openml_pytorch/callbacks.py
63 +64 +65 +66 +67 +68 |
|
This module contains custom dataset classes for handling image and tabular data from OpenML in PyTorch. To add support for new data types, new classes can be added to this module.
+ + + + + + + + +OpenMLImageDataset
+
+
+¶
+ Bases: Dataset
Class representing an image dataset from OpenML for use in PyTorch.
+Methods:
+__init__(self, X, y, image_size, image_dir, transform_x=None, transform_y=None)
+ Initializes the dataset with given data, image size, directory, and optional transformations.
+
+__getitem__(self, idx)
+ Retrieves an image and its corresponding label (if available) from the dataset at the specified index. Applies transformations if provided.
+
+__len__(self)
+ Returns the total number of images in the dataset.
+
openml_pytorch/custom_datasets.py
14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 |
|
OpenMLTabularDataset
+
+
+¶
+ Bases: Dataset
OpenMLTabularDataset
+A custom dataset class to handle tabular data from OpenML (or any similar tabular dataset). +It encodes categorical features and the target column using LabelEncoder from sklearn.
+ + +Methods:
+Name | +Description | +
---|---|
__init__ |
+
+
+
+ Initializes the dataset with the data and the target column. + Encodes the categorical features and target if provided. + |
+
__getitem__ |
+
+
+
+ Retrieves the input data and target value at the specified index. + Converts the data to tensors and returns them. + |
+
__len__ |
+
+
+
+ Returns the length of the dataset. + |
+
openml_pytorch/custom_datasets.py
57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 |
|
This module provides utility functions for evaluating model performance and activation functions. +It includes functions to compute the accuracy, top-k accuracy of model predictions, and the sigmoid function.
+ + + + + + + + +accuracy(out, yb)
+
+¶Computes the accuracy of model predictions.
+Parameters: +out (Tensor): The output tensor from the model, containing predicted class scores. +yb (Tensor): The ground truth labels tensor.
+Returns: +Tensor: The mean accuracy of the predictions, computed as a float tensor.
+ +openml_pytorch/metrics.py
9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 |
|
accuracy_topk(out, yb, k=5)
+
+¶Computes the top-k accuracy of the given model outputs.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ out
+ |
+
+ Tensor
+ |
+
+
+
+ The output predictions of the model, of shape (batch_size, num_classes). + |
+ + required + | +
+ yb
+ |
+
+ Tensor
+ |
+
+
+
+ The ground truth labels, of shape (batch_size,). + |
+ + required + | +
+ k
+ |
+
+ int
+ |
+
+
+
+ The number of top predictions to consider. Default is 5. + |
+
+ 5
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
float | + | +
+
+
+ The top-k accuracy as a float value. + |
+
The function calculates how often the true label is among the top-k predicted labels.
+ +openml_pytorch/metrics.py
23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 |
|
sigmoid(x)
+
+¶Computes the sigmoid function
+The sigmoid function is defined as 1 / (1 + exp(-x)). This function is used +to map any real-valued number into the range (0, 1). It is widely used in +machine learning, especially in logistic regression and neural networks.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ x
+ |
+
+ ndarray or float
+ |
+
+
+
+ The input value or array over which the + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ numpy.ndarray or float: The sigmoid of the input value or array. + |
+
openml_pytorch/metrics.py
40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 |
|
This module defines the Pytorch extension for OpenML-python.
+ + + + + + + + +PytorchExtension
+
+
+¶
+ Bases: Extension
Connect Pytorch to OpenML-Python.
+ + + + + + +openml_pytorch/extension.py
74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 + 100 + 101 + 102 + 103 + 104 + 105 + 106 + 107 + 108 + 109 + 110 + 111 + 112 + 113 + 114 + 115 + 116 + 117 + 118 + 119 + 120 + 121 + 122 + 123 + 124 + 125 + 126 + 127 + 128 + 129 + 130 + 131 + 132 + 133 + 134 + 135 + 136 + 137 + 138 + 139 + 140 + 141 + 142 + 143 + 144 + 145 + 146 + 147 + 148 + 149 + 150 + 151 + 152 + 153 + 154 + 155 + 156 + 157 + 158 + 159 + 160 + 161 + 162 + 163 + 164 + 165 + 166 + 167 + 168 + 169 + 170 + 171 + 172 + 173 + 174 + 175 + 176 + 177 + 178 + 179 + 180 + 181 + 182 + 183 + 184 + 185 + 186 + 187 + 188 + 189 + 190 + 191 + 192 + 193 + 194 + 195 + 196 + 197 + 198 + 199 + 200 + 201 + 202 + 203 + 204 + 205 + 206 + 207 + 208 + 209 + 210 + 211 + 212 + 213 + 214 + 215 + 216 + 217 + 218 + 219 + 220 + 221 + 222 + 223 + 224 + 225 + 226 + 227 + 228 + 229 + 230 + 231 + 232 + 233 + 234 + 235 + 236 + 237 + 238 + 239 + 240 + 241 + 242 + 243 + 244 + 245 + 246 + 247 + 248 + 249 + 250 + 251 + 252 + 253 + 254 + 255 + 256 + 257 + 258 + 259 + 260 + 261 + 262 + 263 + 264 + 265 + 266 + 267 + 268 + 269 + 270 + 271 + 272 + 273 + 274 + 275 + 276 + 277 + 278 + 279 + 280 + 281 + 282 + 283 + 284 + 285 + 286 + 287 + 288 + 289 + 290 + 291 + 292 + 293 + 294 + 295 + 296 + 297 + 298 + 299 + 300 + 301 + 302 + 303 + 304 + 305 + 306 + 307 + 308 + 309 + 310 + 311 + 312 + 313 + 314 + 315 + 316 + 317 + 318 + 319 + 320 + 321 + 322 + 323 + 324 + 325 + 326 + 327 + 328 + 329 + 330 + 331 + 332 + 333 + 334 + 335 + 336 + 337 + 338 + 339 + 340 + 341 + 342 + 343 + 344 + 345 + 346 + 347 + 348 + 349 + 350 + 351 + 352 + 353 + 354 + 355 + 356 + 357 + 358 + 359 + 360 + 361 + 362 + 363 + 364 + 365 + 366 + 367 + 368 + 369 + 370 + 371 + 372 + 373 + 374 + 375 + 376 + 377 + 378 + 379 + 380 + 381 + 382 + 383 + 384 + 385 + 386 + 387 + 388 + 389 + 390 + 391 + 392 + 393 + 394 + 395 + 396 + 397 + 398 + 399 + 400 + 401 + 402 + 403 + 404 + 405 + 406 + 407 + 408 + 409 + 410 + 411 + 412 + 413 + 414 + 415 + 416 + 417 + 418 + 419 + 420 + 421 + 422 + 423 + 424 + 425 + 426 + 427 + 428 + 429 + 430 + 431 + 432 + 433 + 434 + 435 + 436 + 437 + 438 + 439 + 440 + 441 + 442 + 443 + 444 + 445 + 446 + 447 + 448 + 449 + 450 + 451 + 452 + 453 + 454 + 455 + 456 + 457 + 458 + 459 + 460 + 461 + 462 + 463 + 464 + 465 + 466 + 467 + 468 + 469 + 470 + 471 + 472 + 473 + 474 + 475 + 476 + 477 + 478 + 479 + 480 + 481 + 482 + 483 + 484 + 485 + 486 + 487 + 488 + 489 + 490 + 491 + 492 + 493 + 494 + 495 + 496 + 497 + 498 + 499 + 500 + 501 + 502 + 503 + 504 + 505 + 506 + 507 + 508 + 509 + 510 + 511 + 512 + 513 + 514 + 515 + 516 + 517 + 518 + 519 + 520 + 521 + 522 + 523 + 524 + 525 + 526 + 527 + 528 + 529 + 530 + 531 + 532 + 533 + 534 + 535 + 536 + 537 + 538 + 539 + 540 + 541 + 542 + 543 + 544 + 545 + 546 + 547 + 548 + 549 + 550 + 551 + 552 + 553 + 554 + 555 + 556 + 557 + 558 + 559 + 560 + 561 + 562 + 563 + 564 + 565 + 566 + 567 + 568 + 569 + 570 + 571 + 572 + 573 + 574 + 575 + 576 + 577 + 578 + 579 + 580 + 581 + 582 + 583 + 584 + 585 + 586 + 587 + 588 + 589 + 590 + 591 + 592 + 593 + 594 + 595 + 596 + 597 + 598 + 599 + 600 + 601 + 602 + 603 + 604 + 605 + 606 + 607 + 608 + 609 + 610 + 611 + 612 + 613 + 614 + 615 + 616 + 617 + 618 + 619 + 620 + 621 + 622 + 623 + 624 + 625 + 626 + 627 + 628 + 629 + 630 + 631 + 632 + 633 + 634 + 635 + 636 + 637 + 638 + 639 + 640 + 641 + 642 + 643 + 644 + 645 + 646 + 647 + 648 + 649 + 650 + 651 + 652 + 653 + 654 + 655 + 656 + 657 + 658 + 659 + 660 + 661 + 662 + 663 + 664 + 665 + 666 + 667 + 668 + 669 + 670 + 671 + 672 + 673 + 674 + 675 + 676 + 677 + 678 + 679 + 680 + 681 + 682 + 683 + 684 + 685 + 686 + 687 + 688 + 689 + 690 + 691 + 692 + 693 + 694 + 695 + 696 + 697 + 698 + 699 + 700 + 701 + 702 + 703 + 704 + 705 + 706 + 707 + 708 + 709 + 710 + 711 + 712 + 713 + 714 + 715 + 716 + 717 + 718 + 719 + 720 + 721 + 722 + 723 + 724 + 725 + 726 + 727 + 728 + 729 + 730 + 731 + 732 + 733 + 734 + 735 + 736 + 737 + 738 + 739 + 740 + 741 + 742 + 743 + 744 + 745 + 746 + 747 + 748 + 749 + 750 + 751 + 752 + 753 + 754 + 755 + 756 + 757 + 758 + 759 + 760 + 761 + 762 + 763 + 764 + 765 + 766 + 767 + 768 + 769 + 770 + 771 + 772 + 773 + 774 + 775 + 776 + 777 + 778 + 779 + 780 + 781 + 782 + 783 + 784 + 785 + 786 + 787 + 788 + 789 + 790 + 791 + 792 + 793 + 794 + 795 + 796 + 797 + 798 + 799 + 800 + 801 + 802 + 803 + 804 + 805 + 806 + 807 + 808 + 809 + 810 + 811 + 812 + 813 + 814 + 815 + 816 + 817 + 818 + 819 + 820 + 821 + 822 + 823 + 824 + 825 + 826 + 827 + 828 + 829 + 830 + 831 + 832 + 833 + 834 + 835 + 836 + 837 + 838 + 839 + 840 + 841 + 842 + 843 + 844 + 845 + 846 + 847 + 848 + 849 + 850 + 851 + 852 + 853 + 854 + 855 + 856 + 857 + 858 + 859 + 860 + 861 + 862 + 863 + 864 + 865 + 866 + 867 + 868 + 869 + 870 + 871 + 872 + 873 + 874 + 875 + 876 + 877 + 878 + 879 + 880 + 881 + 882 + 883 + 884 + 885 + 886 + 887 + 888 + 889 + 890 + 891 + 892 + 893 + 894 + 895 + 896 + 897 + 898 + 899 + 900 + 901 + 902 + 903 + 904 + 905 + 906 + 907 + 908 + 909 + 910 + 911 + 912 + 913 + 914 + 915 + 916 + 917 + 918 + 919 + 920 + 921 + 922 + 923 + 924 + 925 + 926 + 927 + 928 + 929 + 930 + 931 + 932 + 933 + 934 + 935 + 936 + 937 + 938 + 939 + 940 + 941 + 942 + 943 + 944 + 945 + 946 + 947 + 948 + 949 + 950 + 951 + 952 + 953 + 954 + 955 + 956 + 957 + 958 + 959 + 960 + 961 + 962 + 963 + 964 + 965 + 966 + 967 + 968 + 969 + 970 + 971 + 972 + 973 + 974 + 975 + 976 + 977 + 978 + 979 + 980 + 981 + 982 + 983 + 984 + 985 + 986 + 987 + 988 + 989 + 990 + 991 + 992 + 993 + 994 + 995 + 996 + 997 + 998 + 999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 +1007 +1008 +1009 +1010 +1011 +1012 +1013 +1014 +1015 +1016 +1017 +1018 +1019 +1020 +1021 +1022 +1023 +1024 +1025 +1026 +1027 +1028 +1029 +1030 +1031 +1032 +1033 +1034 +1035 +1036 +1037 +1038 +1039 +1040 +1041 +1042 +1043 +1044 +1045 +1046 +1047 +1048 +1049 +1050 +1051 +1052 +1053 +1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 +1075 +1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 +1214 +1215 +1216 +1217 +1218 +1219 +1220 +1221 +1222 +1223 +1224 +1225 +1226 +1227 +1228 +1229 +1230 +1231 +1232 +1233 +1234 +1235 +1236 +1237 +1238 +1239 +1240 +1241 +1242 +1243 +1244 +1245 +1246 +1247 +1248 +1249 +1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 +1271 +1272 +1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 |
|
can_handle_flow(flow)
+
+
+ classmethod
+
+
+¶Check whether a given describes a Pytorch estimator.
+This is done by parsing the external_version
field.
flow : OpenMLFlow
+bool
+ +openml_pytorch/extension.py
80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 |
|
can_handle_model(model)
+
+
+ classmethod
+
+
+¶Check whether a model is an instance of torch.nn.Module
.
model : Any
+bool
+ +openml_pytorch/extension.py
96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 |
|
check_if_model_fitted(model)
+
+¶Returns True/False denoting if the model has already been fitted/trained +Parameters
+model : Any +Returns
+bool
+ +openml_pytorch/extension.py
1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 |
|
compile_additional_information(task, additional_information)
+
+¶Compiles additional information provided by the extension during the runs into a final +set of files.
+task : OpenMLTask + The task the model was run on. +additional_information: List[Tuple[int, int, Any]] + A list of (fold, repetition, additional information) tuples obtained during training.
+files : Dict[str, Tuple[str, str]] + A dictionary of files with their file name and contents.
+ +openml_pytorch/extension.py
1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 |
|
create_setup_string(model)
+
+¶Create a string which can be used to reinstantiate the given model.
+model : Any
+str
+ +openml_pytorch/extension.py
351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 |
|
flow_to_model(flow, initialize_with_defaults=False)
+
+¶Initializes a Pytorch model based on a flow.
+flow : mixed + the object to deserialize (can be flow object, or any serialized + parameter value that is accepted by)
+ + +If this flag is set, the hyperparameter values of flows will be +ignored and a flow with its defaults is returned.
+mixed
+ +openml_pytorch/extension.py
119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 |
|
get_version_information()
+
+¶List versions of libraries required by the flow.
+Libraries listed are Python
, pytorch
, numpy
and scipy
.
List
+ +openml_pytorch/extension.py
327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 |
|
instantiate_model_from_hpo_class(model, trace_iteration)
+
+¶Instantiate a base_estimator
which can be searched over by the hyperparameter
+optimization model (UNUSED)
model : Any + A hyperparameter optimization model which defines the model to be instantiated. +trace_iteration : OpenMLTraceIteration + Describing the hyperparameter settings to instantiate.
+Any
+ +openml_pytorch/extension.py
1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 |
|
is_estimator(model)
+
+¶Check whether the given model is a pytorch estimator.
+This function is only required for backwards compatibility and will be removed in the +near future.
+model : Any
+bool
+ +openml_pytorch/extension.py
944 +945 +946 +947 +948 +949 +950 +951 +952 +953 +954 +955 +956 +957 +958 |
|
model_to_flow(model, custom_name=None)
+
+¶Transform a Pytorch model to a flow for uploading it to OpenML.
+model : Any
+OpenMLFlow
+ +openml_pytorch/extension.py
272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 |
|
obtain_parameter_values(flow, model=None)
+
+¶Extracts all parameter settings required for the flow from the model.
+If no explicit model is provided, the parameters will be extracted from flow.model
+instead.
flow : OpenMLFlow + OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)
+ + +The model from which to obtain the parameter values. Must match the flow signature.
+If None, use the model specified in OpenMLFlow.model
.
list
+ A list of dicts, where each dict has the following entries:
+ - oml:name
: str: The OpenML parameter name
+ - oml:value
: mixed: A representation of the parameter value
+ - oml:component
: int: flow id to which the parameter belongs
openml_pytorch/extension.py
1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 |
|
seed_model(model, seed=None)
+
+¶Set the random state of all the unseeded components of a model and return the seeded +model.
+Required so that all seed information can be uploaded to OpenML for reproducible results.
+Models that are already seeded will maintain the seed. In this case, +only integer seeds are allowed (An exception is raised when a RandomState was used as +seed).
+model : pytorch model + The model to be seeded +seed : int + The seed to initialize the RandomState with. Unseeded subcomponents + will be seeded with a random number from the RandomState.
+Any
+ +openml_pytorch/extension.py
960 +961 +962 +963 +964 +965 +966 +967 +968 +969 +970 +971 +972 +973 +974 +975 +976 +977 +978 +979 +980 +981 +982 +983 |
|
This module provides classes and methods to facilitate the configuration, data handling, training, and evaluation of machine learning models using PyTorch and OpenML datasets. The functionalities include: +- Generation of default configurations for models. +- Handling of image and tabular data. +- Training and evaluating machine learning models. +- Exporting trained models to ONNX format. +- Managing data transformations and loaders.
+ + +This module provides classes and methods to facilitate the configuration, data handling, training, and evaluation of machine learning models using PyTorch and OpenML datasets. The functionalities include: +- Generation of default configurations for models. +- Handling of image and tabular data. +- Training and evaluating machine learning models. +- Exporting trained models to ONNX format. +- Managing data transformations and loaders.
+ + + + + + + + +BaseDataHandler
+
+
+¶BaseDataHandler class is an abstract base class for data handling operations.
+ + + + + + +openml_pytorch/trainer.py
213 +214 +215 +216 +217 +218 +219 +220 +221 |
|
DataContainer
+
+
+¶A class to contain the training, validation, and test data loaders. This just makes it easier to access them when required.
+Attributes: +train_dl: DataLoader object for the training data. +valid_dl: DataLoader object for the validation data. +test_dl: Optional DataLoader object for the test data.
+openml_pytorch/trainer.py
278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 |
|
DefaultConfigGenerator
+
+
+¶DefaultConfigGenerator class provides various methods to generate default configurations.
+ + + + + + +openml_pytorch/trainer.py
60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 |
|
get_device()
+
+¶Checks if a GPU is available and returns the device to be used for training (cuda, mps or cpu)
+ +openml_pytorch/trainer.py
141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 |
|
return_data_config()
+
+¶Returns a configuration object for the data
+ +openml_pytorch/trainer.py
189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 |
|
return_model_config()
+
+¶Returns a configuration object for the model
+ +openml_pytorch/trainer.py
166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 |
|
OpenMLImageHandler
+
+
+¶
+ Bases: BaseDataHandler
OpenMLImageHandler is a class that extends BaseDataHandler to handle image data from OpenML datasets.
+ + + + + + +openml_pytorch/trainer.py
224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 |
|
OpenMLTabularHandler
+
+
+¶
+ Bases: BaseDataHandler
OpenMLTabularHandler is a class that extends BaseDataHandler to handle tabular data from OpenML datasets.
+ + + + + + +openml_pytorch/trainer.py
256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 |
|
OpenMLTrainerModule
+
+
+¶openml_pytorch/trainer.py
493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 +733 +734 +735 +736 +737 |
|
convert_to_rgb(image)
+
+¶Converts an image to RGB mode if it is not already in that mode.
+Parameters: +image (PIL.Image): The image to be converted.
+Returns: +PIL.Image: The converted image in RGB mode.
+ +openml_pytorch/trainer.py
46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 |
|
import openml
+
+import numpy as np
+import pandas as pd
+import sklearn.datasets
+
+import openml
+from openml.datasets.functions import create_dataset
+import os
+import requests
+import zipfile
+import glob
+
def create_tiny_imagenet():
+ dir_name = "datasets"
+ os.makedirs(dir_name, exist_ok=True)
+
+ # download the dataset
+ url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
+ r = requests.get(url, stream=True)
+
+ if not os.path.exists(f"{dir_name}/tiny-imagenet-200.zip"):
+ with open(f"{dir_name}/tiny-imagenet-200.zip", "wb") as f:
+ f.write(r.content)
+
+ with zipfile.ZipFile(f"{dir_name}/tiny-imagenet-200.zip", 'r') as zip_ref:
+ zip_ref.extractall(f"{dir_name}/")
+ ## recusively find all the images
+ image_paths = glob.glob(f"{dir_name}/tiny-imagenet-200/train/*/*/*.JPEG")
+ ## remove the first part of the path
+ image_paths = [path.split("/", 1)[-1] for path in image_paths]
+ ## create a dataframe with the image path and the label
+ label_func = lambda x: x.split("/")[2]
+ df = pd.DataFrame(image_paths, columns=["image_path"])
+ df["label"] = df["image_path"].apply(label_func)
+ ## encode the labels as integers
+ # df["Class_encoded"] = pd.factorize(df["label"])[0]
+
+ ## encode types
+ df["image_path"] = df["image_path"].astype("string")
+ df["label"] = df["label"].astype("string")
+
+
+ name = "tiny-imagenet-200"
+ attribute_names = df.columns
+ description = "Tiny ImageNet contains 100000 images of 200 classes (500 for each class) downsized to 64 x 64 colored images. Each class has 500 training images, 50 validation images, and 50 test images. The dataset here just contains links to the images and the labels. The dataset can be downloaded from the official website ![here](http://cs231n.stanford.edu/tiny-imagenet-200.zip). /n Link to the paper - [Tiny ImageNet Classification with CNN](https://cs231n.stanford.edu/reports/2017/pdfs/930.pdf)"
+ paper_url = "https://cs231n.stanford.edu/reports/2017/pdfs/930.pdf"
+ citation = ("Wu, J., Zhang, Q., & Xu, G. (2017). Tiny imagenet challenge. Technical report.")
+
+ tinyim = create_dataset(
+ name = name,
+ description = description,
+ creator= "Jiayu Wu, Qixiang Zhang, Guoxi Xu",
+ contributor = "Jiayu Wu, Qixiang Zhang, Guoxi Xu",
+ collection_date = "2017",
+ language= "English",
+ licence="DbCL v1.0",
+ default_target_attribute="label",
+ attributes="auto",
+ data=df,
+ citation=citation,
+ ignore_attribute=None
+ )
+ openml.config.apikey = ''
+ tinyim.publish()
+ print(f"URL for dataset: {tinyim.openml_url}")
+
create_tiny_imagenet()
+# https://www.openml.org/d/46346
+
def create_tiniest_imagenet():
+ dir_name = "datasets"
+ os.makedirs(dir_name, exist_ok=True)
+
+ # download the dataset
+ url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
+ r = requests.get(url, stream=True)
+
+ if not os.path.exists(f"{dir_name}/tiny-imagenet-200.zip"):
+ with open(f"{dir_name}/tiny-imagenet-200.zip", "wb") as f:
+ f.write(r.content)
+
+ with zipfile.ZipFile(f"{dir_name}/tiny-imagenet-200.zip", 'r') as zip_ref:
+ zip_ref.extractall(f"{dir_name}/")
+ ## recusively find all the images
+ image_paths = glob.glob(f"{dir_name}/tiny-imagenet-200/train/*/*/*.JPEG")
+ ## remove the first part of the path
+ image_paths = [path.split("/", 1)[-1] for path in image_paths]
+ image_paths[-1]
+ ## create a dataframe with the image path and the label
+ label_func = lambda x: x.split("/")[2]
+ df = pd.DataFrame(image_paths, columns=["image_path"])
+ df["label"] = df["image_path"].apply(label_func)
+ ## encode types
+ df["image_path"] = df["image_path"].astype("string")
+ df["label"] = df["label"].astype("string")
+
+ # keep only first 20 images for each label
+ df = df.groupby("label").head(20)
+
+
+ name = "tiniest-imagenet-200"
+ attribute_names = df.columns
+ description = "Tiny ImageNet contains 100000 images of 200 classes (500 for each class) downsized to 64 x 64 colored images. !!! This dataset only links to 20 images per class (instead of the usual 500) and is ONLY for quickly testing a framework. !!! Each class has 500 training images, 50 validation images, and 50 test images. The dataset here just contains links to the images and the labels. The dataset can be downloaded from the official website ![here](http://cs231n.stanford.edu/tiny-imagenet-200.zip). /n Link to the paper - [Tiny ImageNet Classification with CNN](https://cs231n.stanford.edu/reports/2017/pdfs/930.pdf)"
+ paper_url = "https://cs231n.stanford.edu/reports/2017/pdfs/930.pdf"
+ citation = ("Wu, J., Zhang, Q., & Xu, G. (2017). Tiny imagenet challenge. Technical report.")
+
+ tinyim = create_dataset(
+ name = name,
+ description = description,
+ creator= "Jiayu Wu, Qixiang Zhang, Guoxi Xu",
+ contributor = "Jiayu Wu, Qixiang Zhang, Guoxi Xu",
+ collection_date = "2017",
+ language= "English",
+ licence="DbCL v1.0",
+ default_target_attribute="label",
+ attributes="auto",
+ data=df,
+ citation=citation,
+ ignore_attribute=None
+ )
+ openml.config.apikey = ''
+ tinyim.publish()
+ print(f"URL for dataset: {tinyim.openml_url}")
+
create_tiniest_imagenet()
+# https://www.openml.org/d/46347
+
URL for dataset: https://www.openml.org/d/46347 ++
def create_task():
+ # Define task parameters
+ task_type = openml.tasks.TaskType.SUPERVISED_CLASSIFICATION
+ dataset_id = 46347 # Obtained from the dataset creation step
+ evaluation_measure = 'predictive_accuracy'
+ target_name = 'label'
+ class_labels = list(pd.read_csv("datasets/tiniest_imagenet.csv")["label"].unique())
+ cost_matrix = None
+
+ # Create the task
+ new_task = openml.tasks.create_task(
+ task_type=task_type,
+ dataset_id=dataset_id,
+ estimation_procedure_id = 1,
+ evaluation_measure=evaluation_measure,
+ target_name=target_name,
+ class_labels=class_labels,
+ cost_matrix=cost_matrix
+ )
+ openml.config.apikey = ''
+ new_task.publish()
+ print(f"URL for task: {new_task.openml_url}")
+
create_task()
+# https://www.openml.org/t/362128
+
URL for task: https://www.openml.org/t/362128 ++
import torch.nn
+import torch.optim
+
+import openml_pytorch.config
+import openml
+import logging
+import warnings
+
+# Suppress FutureWarning messages
+warnings.simplefilter(action='ignore')
+
+############################################################################
+# Enable logging in order to observe the progress while running the example.
+openml.config.logger.setLevel(logging.DEBUG)
+openml_pytorch.config.logger.setLevel(logging.DEBUG)
+############################################################################
+
+############################################################################
+from openml_pytorch.trainer import OpenMLTrainerModule
+from openml_pytorch.trainer import OpenMLDataModule
+from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor, Lambda
+import torchvision
+
+from openml_pytorch.trainer import convert_to_rgb
+
model = torchvision.models.efficientnet_b0(num_classes=200)
+
file_dir
directory, and the filename_col
is correctly set along with this column correctly pointing to where your data is stored.transform = Compose(
+ [
+ ToPILImage(), # Convert tensor to PIL Image to ensure PIL Image operations can be applied.
+ Lambda(
+ convert_to_rgb
+ ), # Convert PIL Image to RGB if it's not already.
+ Resize(
+ (64, 64)
+ ), # Resize the image.
+ ToTensor(), # Convert the PIL Image back to a tensor.
+ ]
+)
+data_module = OpenMLDataModule(
+ type_of_data="image",
+ file_dir="datasets",
+ filename_col="image_path",
+ target_mode="categorical",
+ target_column="label",
+ batch_size = 64,
+ transform=transform
+)
+
trainer = OpenMLTrainerModule(
+ data_module=data_module,
+ verbose = True,
+ epoch_count = 1,
+ callbacks=[],
+)
+openml_pytorch.config.trainer = trainer
+
# Download the OpenML task for tiniest imagenet
+task = openml.tasks.get_task(362128)
+
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
+
run.publish()
+
trainer.runner.cbs[1].plot_loss()
+
trainer.runner.cbs[1].plot_lr()
+
trainer.learn.model_classes
+
run.publish()
+
import torch.nn
+import torch.optim
+
+import openml
+import openml_pytorch
+import openml_pytorch.layers
+import openml_pytorch.config
+from openml import OpenMLTask
+import logging
+import warnings
+from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor, Lambda
+from openml_pytorch.trainer import convert_to_rgb
+# Suppress FutureWarning messages
+warnings.simplefilter(action='ignore')
+
+############################################################################
+# Enable logging in order to observe the progress while running the example.
+openml.config.logger.setLevel(logging.DEBUG)
+openml_pytorch.config.logger.setLevel(logging.DEBUG)
+############################################################################
+
+############################################################################
+import torch.nn as nn
+import torch.nn.functional as F
+
# openml.config.apikey = 'key'
+from openml_pytorch.trainer import OpenMLTrainerModule
+from openml_pytorch.trainer import OpenMLDataModule
+from openml_pytorch.trainer import Callback
+
# Example model. You can do better :)
+import torchvision.models as models
+
+# Load the pre-trained ResNet model
+model = models.efficientnet_b0(pretrained=True)
+
+# Modify the last fully connected layer to the required number of classes
+num_classes = 200
+in_features = model.classifier[-1].in_features
+# model.fc = nn.Linear(in_features, num_classes)
+model.classifier = nn.Sequential(
+ nn.Dropout(p=0.2, inplace=True),
+ nn.Linear(in_features, num_classes),
+)
+
+# Optional: If you're fine-tuning, you may want to freeze the pre-trained layers
+# for param in model.parameters():
+# param.requires_grad = False
+
+# # If you want to train the last layer only (the newly added layer)
+# for param in model.fc.parameters():
+# param.requires_grad = True
+
file_dir
directory, and the filename_col
is correctly set along with this column correctly pointing to where your data is stored.transform = Compose(
+ [
+ ToPILImage(), # Convert tensor to PIL Image to ensure PIL Image operations can be applied.
+ Lambda(
+ convert_to_rgb
+ ), # Convert PIL Image to RGB if it's not already.
+ Resize(
+ (64, 64)
+ ), # Resize the image.
+ ToTensor(), # Convert the PIL Image back to a tensor.
+ ]
+)
+data_module = OpenMLDataModule(
+ type_of_data="image",
+ file_dir="datasets",
+ filename_col="image_path",
+ target_mode="categorical",
+ target_column="label",
+ batch_size = 64,
+ transform=transform
+)
+
def custom_optimizer_gen(model: torch.nn.Module, task: OpenMLTask) -> torch.optim.Optimizer:
+ return torch.optim.Adam(model.fc.parameters())
+
+trainer = OpenMLTrainerModule(
+ data_module=data_module,
+ verbose = True,
+ epoch_count = 1,
+ optimizer = custom_optimizer_gen,
+ callbacks=[],
+)
+openml_pytorch.config.trainer = trainer
+
# Download the OpenML task for tiniest imagenet
+task = openml.tasks.get_task(362128)
+
#
+# Run the model on the task (requires an API key).m
+run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
+
trainer.runner.cbs[1].plot_loss()
+
trainer.runner.cbs[1].plot_lr()
+
run.publish()
+
import torch.nn
+import torch.optim
+
+import openml_pytorch.config
+import openml
+import logging
+import warnings
+
+# Suppress FutureWarning messages
+warnings.simplefilter(action='ignore')
+
+############################################################################
+# Enable logging in order to observe the progress while running the example.
+openml.config.logger.setLevel(logging.DEBUG)
+openml_pytorch.config.logger.setLevel(logging.DEBUG)
+############################################################################
+
from openml_pytorch.trainer import OpenMLTrainerModule
+from openml_pytorch.trainer import OpenMLDataModule
+
############################################################################
+# Define a sequential network that does the initial image reshaping
+# and normalization model.
+processing_net = torch.nn.Sequential(
+ openml_pytorch.layers.Functional(function=torch.Tensor.reshape,
+ shape=(-1, 1, 28, 28)),
+ torch.nn.BatchNorm2d(num_features=1)
+)
+############################################################################
+
+############################################################################
+# Define a sequential network that does the extracts the features from the
+# image.
+features_net = torch.nn.Sequential(
+ torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5),
+ torch.nn.LeakyReLU(),
+ torch.nn.MaxPool2d(kernel_size=2),
+ torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5),
+ torch.nn.LeakyReLU(),
+ torch.nn.MaxPool2d(kernel_size=2),
+)
+############################################################################
+
+############################################################################
+# Define a sequential network that flattens the features and compiles the
+# results into probabilities for each digit.
+results_net = torch.nn.Sequential(
+ openml_pytorch.layers.Functional(function=torch.Tensor.reshape,
+ shape=(-1, 4 * 4 * 64)),
+ torch.nn.Linear(in_features=4 * 4 * 64, out_features=256),
+ torch.nn.LeakyReLU(),
+ torch.nn.Dropout(),
+ torch.nn.Linear(in_features=256, out_features=10),
+)
+############################################################################
+# openml.config.apikey = 'key'
+
+############################################################################
+# The main network, composed of the above specified networks.
+model = torch.nn.Sequential(
+ processing_net,
+ features_net,
+ results_net
+)
+############################################################################
+
target_col
is correctly set.data_module = OpenMLDataModule(
+ type_of_data="dataframe",
+ filename_col="class",
+ target_mode="categorical",
+)
+
trainer = OpenMLTrainerModule(
+ data_module=data_module,
+ verbose = True,
+ epoch_count = 1,
+ callbacks=[],
+)
+openml_pytorch.config.trainer = trainer
+
# Download the OpenML task for the mnist 784 dataset.
+task = openml.tasks.get_task(3573)
+
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
+
trainer.runner.cbs[1].plot_loss()
+
trainer.runner.cbs[1].plot_lr()
+
run.publish()
+
import torch.nn
+import torch.optim
+
+import openml
+import openml_pytorch
+import openml_pytorch.layers
+import openml_pytorch.config
+import logging
+
+
+############################################################################
+# Enable logging in order to observe the progress while running the example.
+openml.config.logger.setLevel(logging.DEBUG)
+openml_pytorch.config.logger.setLevel(logging.DEBUG)
+############################################################################
+
from openml_pytorch.trainer import OpenMLTrainerModule
+from openml_pytorch.trainer import OpenMLDataModule
+from openml_pytorch.trainer import Callback
+
class TabularClassificationmodel(torch.nn.Module):
+ def __init__(self, input_size, output_size):
+ super(TabularClassificationmodel, self).__init__()
+ self.fc1 = torch.nn.Linear(input_size, 128)
+ self.fc2 = torch.nn.Linear(128, 64)
+ self.fc3 = torch.nn.Linear(64, output_size)
+ self.relu = torch.nn.ReLU()
+ self.softmax = torch.nn.Softmax(dim=1)
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.relu(x)
+ x = self.fc2(x)
+ x = self.relu(x)
+ x = self.fc3(x)
+ x = self.softmax(x)
+ return x
+
model = TabularClassificationmodel(20, 2)
+
# supervised credit-g classification
+task = openml.tasks.get_task(31)
+
target_col
is correctly set.data_module = OpenMLDataModule(
+ type_of_data="dataframe",
+ target_column="class",
+ target_mode="categorical",
+)
+
trainer = OpenMLTrainerModule(
+ data_module=data_module,
+ verbose = True,
+ epoch_count = 5,
+)
+openml_pytorch.config.trainer = trainer
+
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)
+
run.publish()
+
# openml.config.apikey = ''
+
trainer.runner.cbs[1].plot_loss()
+
trainer.runner.cbs[1].plot_lr()
+
This folder contains examples of how to use the openml-pytorch
extension for different types of data.
Along with this PyTorch API, OpenML is also integrated in PyTorch through the following modules.
+1 +2 +3 |
|
1 +2 +3 |
|
This API is designed to make it easier to use PyTorch with OpenML and has been heavily inspired by the current state of the art Deep Learning frameworks like FastAI and PyTorch Lightning.
+To make the library as modular as possible, callbacks are used throughout the training loop. This allows for easy customization of the training loop without having to modify the core code.
+Here, we focus on the data, model and training as separate blocks that can be strung together in a pipeline. This makes it easier to experiment with different models, data and training strategies.
+That being the case, the OpenMLDataModule and OpenMLTrainerModule are designed to handle the data and training respectively. This might seem a bit verbose at first, but it makes it easier to understand what is happening at each step of the process and allows for easier customization.
+ + + + + + + + + + + + + +