Skip to content

Commit 4e16003

Browse files
author
prithagupta
committed
Added utility functions on docs
1 parent 26e87c7 commit 4e16003

File tree

5 files changed

+158
-23
lines changed

5 files changed

+158
-23
lines changed

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Contents
2222

2323
notebooks/comparing_mi_estimators
2424
notebooks/automated_information_leakage_detection
25+
notebooks/utils
2526

2627
.. toctree::
2728
:maxdepth: 2

docs/source/notebooks/automated_information_leakage_detection.ipynb

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717
},
1818
{
1919
"cell_type": "code",
20-
"execution_count": 1,
2120
"id": "63a2d792-bd63-4f48-b64c-5bdcf0d9af40",
22-
"metadata": {},
23-
"outputs": [],
21+
"metadata": {
22+
"ExecuteTime": {
23+
"end_time": "2024-08-16T19:46:29.050721Z",
24+
"start_time": "2024-08-16T19:46:29.033296Z"
25+
}
26+
},
2427
"source": [
2528
"import logging\n",
2629
"import warnings\n",
@@ -31,7 +34,9 @@
3134
"logging.getLogger(\"pytorch\").setLevel(logging.ERROR)\n",
3235
"logging.getLogger(\"torch\").setLevel(logging.ERROR)\n",
3336
"logging.getLogger(\"urllib3.connectionpool\").setLevel(logging.ERROR)"
34-
]
37+
],
38+
"outputs": [],
39+
"execution_count": 1
3540
},
3641
{
3742
"cell_type": "code",
@@ -52,13 +57,9 @@
5257
}
5358
],
5459
"source": [
55-
"import numpy as np\n",
56-
"import matplotlib.pyplot as plt\n",
57-
"from sklearn.model_selection import train_test_split\n",
5860
"from autoqild.detectors import TabPFNLeakageDetector, RandomForestLeakageDetector\n",
5961
"from autoqild.dataset_readers import SyntheticDatasetGeneratorDistance\n",
60-
"from utils import setup_logging, setup_random_seed, create_search_space\n",
61-
"import pandoc"
62+
"from utils import setup_logging, setup_random_seed, create_search_space"
6263
]
6364
},
6465
{

docs/source/notebooks/comparing_mi_estimators.ipynb

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,12 @@
3939
"outputs": [],
4040
"source": [
4141
"# Cell 1: Setup and Imports\n",
42-
"import matplotlib.pyplot as plt\n",
43-
"import numpy as np\n",
4442
"from autoqild.dataset_readers import SyntheticDatasetGenerator\n",
4543
"from autoqild.mi_estimators.mi_estimator_classification import ClassficationMIEstimator\n",
4644
"from autoqild.mi_estimators import MineMIEstimatorMSE, GMMMIEstimator, TabPFNMIEstimator\n",
4745
"from autoqild.utilities import print_dictionary\n",
4846
"from utils import setup_logging, setup_random_seed\n",
49-
"from sklearn.model_selection import train_test_split\n",
50-
"import pandoc"
47+
"from sklearn.model_selection import train_test_split"
5148
]
5249
},
5350
{
@@ -57,7 +54,7 @@
5754
"source": [
5855
"**Setting Up Experiment Logging and Random Seed:**\n",
5956
"\n",
60-
"Initialize logging for the experiment, recording all key events in info_leakage_detection.log for tracking and debugging."
57+
"Initialize logging for the experiment, recording all key events in info_leakage_detection.log for tracking and debugging. Using the utils.py file."
6158
]
6259
},
6360
{

docs/source/notebooks/utils.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
from sklearn.utils import check_random_state
1313
from skopt.space import Real, Categorical, Integer
1414

15-
from autoqild import *
16-
1715

1816
def create_search_space(hp_ranges, logger):
1917
def isint(v):
@@ -45,13 +43,6 @@ def isstr(v):
4543
return search_space
4644

4745

48-
def convert_learner_params(params):
49-
for key, value in params.items():
50-
if value == "None":
51-
params[key] = None
52-
return params
53-
54-
5546
def setup_logging(log_path=None, level=logging.INFO):
5647
"""Function setup as many logging for the experiments."""
5748
if log_path is None:

docs/source/notebooks/utils.rst

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
Utilities Module
2+
================
3+
4+
This module contains utility functions for logging setup, search space creation, and random seed setup, designed for compatibility with TensorFlow, PyTorch, and scikit-learn.
5+
6+
Functions
7+
---------
8+
9+
create_search_space
10+
-------------------
11+
12+
This function creates a hyperparameter search space based on provided ranges, supporting integers, floats, booleans, and strings.
13+
14+
**Code:**
15+
16+
.. code-block:: python
17+
18+
import inspect
19+
import logging
20+
import os
21+
import random
22+
23+
import numpy as np
24+
import sklearn
25+
import tensorflow as tf
26+
import torch
27+
from packaging import version
28+
from skopt.space import Real, Categorical, Integer
29+
30+
def create_search_space(hp_ranges, logger):
31+
def isint(v):
32+
return type(v) is int
33+
34+
def isfloat(v):
35+
return type(v) is float
36+
37+
def isbool(v):
38+
return type(v) is bool
39+
40+
def isstr(v):
41+
return type(v) is str
42+
43+
search_space = {}
44+
for key, value in hp_ranges.items():
45+
logger.info(f"Before key {key} value {value}")
46+
if version.parse(sklearn.__version__) < version.parse("0.25.0"):
47+
if key == "criterion" and "squared_error" in value:
48+
value = ["friedman_mse", "mse"]
49+
if isint(value[0]) and isint(value[1]):
50+
search_space[key] = Integer(value[0], value[1])
51+
if isfloat(value[0]) and isfloat(value[1]):
52+
if len(value) == 3:
53+
search_space[key] = Real(value[0], value[1], prior=value[2])
54+
if (isbool(value[0]) and isbool(value[1])) or (isstr(value[0]) and isstr(value[1])):
55+
search_space[key] = Categorical(value)
56+
logger.info(f"key {key} value {value}")
57+
return search_space
58+
59+
60+
setup_logging
61+
-------------
62+
63+
Sets up logging for experiments, allowing control over log file location and verbosity.
64+
65+
**Code:**
66+
67+
.. code-block:: python
68+
69+
def setup_logging(log_path=None, level=logging.INFO):
70+
"""Function setup as many logging for the experiments."""
71+
if log_path is None:
72+
dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
73+
dirname = os.path.dirname(dirname)
74+
log_path = os.path.join(dirname, "logs", "logs.log")
75+
76+
logging.basicConfig(
77+
filename=log_path,
78+
level=level,
79+
format="%(asctime)s %(name)s %(levelname)-8s %(message)s",
80+
datefmt="%Y-%m-%d %H:%M:%S",
81+
force=True,
82+
)
83+
logger = logging.getLogger("SetupLogging") # root logger
84+
logger.info("log file path: {}".format(log_path))
85+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # Suppresses INFO, WARNING, and ERROR logs
86+
# Additional TensorFlow setting to disable GPU usage explicitly
87+
tf.config.set_visible_devices([], "GPU")
88+
logging.captureWarnings(False)
89+
import warnings
90+
91+
warnings.filterwarnings("ignore")
92+
warnings.filterwarnings("ignore", category=DeprecationWarning)
93+
logging.getLogger("matplotlib").setLevel(logging.ERROR)
94+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
95+
logging.getLogger("pytorch").setLevel(logging.ERROR)
96+
logging.getLogger("torch").setLevel(logging.ERROR)
97+
logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
98+
99+
100+
setup_random_seed
101+
-----------------
102+
103+
Sets up a random seed across TensorFlow, PyTorch, NumPy, and Python’s `random` module, while also configuring CPU and GPU usage.
104+
105+
**Code:**
106+
107+
.. code-block:: python
108+
109+
def setup_random_seed(random_state=1234):
110+
logger = logging.getLogger("Setup Logging")
111+
random_state = check_random_state(random_state)
112+
113+
seed = random_state.randint(2**31, dtype="uint32")
114+
torch.manual_seed(seed)
115+
logger.info(f"Total number of torch threads {torch.get_num_threads()}")
116+
if torch.get_num_threads() <= 2:
117+
n_cpus = 1
118+
else:
119+
n_cpus = torch.get_num_threads() - 2
120+
if "pc2" in os.environ["HOME"]:
121+
n_cpus = 4
122+
logger.info(f"Torch threads set {n_cpus}")
123+
124+
torch.set_num_threads(n_cpus)
125+
tf.random.set_seed(seed)
126+
127+
seed = random_state.randint(2**31, dtype="uint32")
128+
np.random.seed(seed)
129+
random.seed(seed)
130+
os.environ["KERAS_BACKEND"] = "tensorflow"
131+
devices = tf.config.list_physical_devices("GPU")
132+
logger.info("Keras Devices {}".format(devices))
133+
n_gpus = len(devices)
134+
logger.info("Keras GPU {}".format(n_gpus))
135+
if n_gpus == 0:
136+
cpu_count = multiprocessing.cpu_count()
137+
tf.config.threading.set_inter_op_parallelism_threads(1)
138+
tf.config.threading.set_intra_op_parallelism_threads(1)
139+
if cpu_count > 2:
140+
pass
141+
else:
142+
for gpu in tf.config.list_physical_devices("GPU"):
143+
tf.config.experimental.set_memory_growth(gpu, True)
144+
torch_gpu = torch.device("cuda" if torch.cuda.is_available() else "cpu")
145+
logger.info("Torch GPU device {}".format(torch_gpu))

0 commit comments

Comments
 (0)