Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade base pandas version #740

Merged
merged 24 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
01daf06
changed code for pandas to pandas>=2.0.0
Ainesh06102004 Nov 7, 2023
62a4714
made change
Ainesh06102004 Nov 8, 2023
e9969b6
Merge branch 'master' of https://github.com/mlcommons/GaNDLF into por…
Ainesh06102004 Nov 8, 2023
ddc3d36
changed code from dataframe.append to dataframe._append
Ainesh06102004 Nov 8, 2023
9b8aea1
works fine
Ainesh06102004 Nov 11, 2023
5e3396d
test cases 12,13 and 14 failed with weird errors
Ainesh06102004 Nov 13, 2023
b58d580
few errors, have commented them, proceeding with rest for now
Ainesh06102004 Nov 13, 2023
13000c0
Merge branch 'master' of https://github.com/mlcommons/GaNDLF into por…
Ainesh06102004 Nov 13, 2023
5ac161e
numpy error raised
Ainesh06102004 Nov 13, 2023
2e7abaa
Merge branch 'feature/python_3_10_numpy_1_25' of https://github.com/s…
Ainesh06102004 Nov 14, 2023
fb2a2fe
some fuctions have given contextual version error with pandas
Ainesh06102004 Nov 14, 2023
4d9b8f1
some functions are giving contextual version error and some are givin…
Ainesh06102004 Nov 14, 2023
8f00140
Merge branch 'master' of https://github.com/mlcommons/GaNDLF into por…
Ainesh06102004 Nov 14, 2023
9a13a24
upgraded all code related to pandas to its 2.0.0 version
Ainesh06102004 Nov 14, 2023
55d0841
Merge branch 'master' into port_to_pandas_2.0
sarthakpati Nov 17, 2023
c00906d
resolved all issues hopefully
Ainesh06102004 Nov 20, 2023
aed9c06
Merge branch 'master' of https://github.com/mlcommons/GaNDLF into por…
Ainesh06102004 Nov 20, 2023
a4fddf7
test commit
Ainesh06102004 Nov 20, 2023
12d9f3d
revert
Ainesh06102004 Nov 20, 2023
cf767f2
test syntax fix
Ainesh06102004 Nov 20, 2023
4bee0a9
Merge branch 'port_to_pandas_2.0_2' of https://github.com/Ainesh06102…
Ainesh06102004 Nov 20, 2023
428726e
removed whitespace
Ainesh06102004 Nov 20, 2023
5a3137c
blacked files
Ainesh06102004 Nov 20, 2023
6c5d5c6
changed concat to append
Ainesh06102004 Nov 21, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 42 additions & 11 deletions GANDLF/cli/generate_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def generate_metrics_dict(input_csv: str, config: str, outputfile: str = None) -
# check required headers in a case insensitive manner
headers = {}
required_columns = ["subjectid", "prediction", "target"]
for col, _ in input_df.iteritems():
for col, _ in input_df.items():
col_lower = col.lower()
for column_to_check in required_columns:
if column_to_check == col_lower:
Expand Down Expand Up @@ -194,7 +194,13 @@ def __fix_2d_tensor(input_tensor):
else:
return input_tensor

def __percentile_clip(input_tensor, reference_tensor=None, p_min=0.5, p_max=99.5, strictlyPositive=True):
def __percentile_clip(
input_tensor,
reference_tensor=None,
p_min=0.5,
p_max=99.5,
strictlyPositive=True,
):
"""Normalizes a tensor based on percentiles. Clips values below and above the percentile.
Percentiles for normalization can come from another tensor.

Expand All @@ -209,13 +215,21 @@ def __percentile_clip(input_tensor, reference_tensor=None, p_min=0.5, p_max=99.5
Returns:
torch.Tensor: The input_tensor normalized based on the percentiles of the reference tensor.
"""
reference_tensor = input_tensor if reference_tensor is None else reference_tensor
v_min, v_max = np.percentile(reference_tensor, [p_min,p_max]) #get p_min percentile and p_max percentile
reference_tensor = (
input_tensor if reference_tensor is None else reference_tensor
)
v_min, v_max = np.percentile(
reference_tensor, [p_min, p_max]
) # get p_min percentile and p_max percentile

# set lower bound to be 0 if strictlyPositive is enabled
v_min = max(v_min, 0.0) if strictlyPositive else v_min
output_tensor = np.clip(input_tensor,v_min,v_max) #clip values to percentiles from reference_tensor
output_tensor = (output_tensor - v_min)/(v_max-v_min) #normalizes values to [0;1]
output_tensor = np.clip(
input_tensor, v_min, v_max
) # clip values to percentiles from reference_tensor
output_tensor = (output_tensor - v_min) / (
v_max - v_min
) # normalizes values to [0;1]
return output_tensor

for _, row in tqdm(input_df.iterrows(), total=input_df.shape[0]):
Expand Down Expand Up @@ -244,9 +258,23 @@ def __percentile_clip(input_tensor, reference_tensor=None, p_min=0.5, p_max=99.5
# Normalize to [0;1] based on GT (otherwise MSE will depend on the image intensity range)
normalize = parameters.get("normalize", True)
if normalize:
reference_tensor = target_image * ~mask #use all the tissue that is not masked for normalization
gt_image_infill = __percentile_clip(gt_image_infill, reference_tensor=reference_tensor, p_min=0.5, p_max=99.5, strictlyPositive=True)
output_infill = __percentile_clip(output_infill, reference_tensor=reference_tensor, p_min=0.5, p_max=99.5, strictlyPositive=True)
reference_tensor = (
target_image * ~mask
) # use all the tissue that is not masked for normalization
gt_image_infill = __percentile_clip(
gt_image_infill,
reference_tensor=reference_tensor,
p_min=0.5,
p_max=99.5,
strictlyPositive=True,
)
output_infill = __percentile_clip(
output_infill,
reference_tensor=reference_tensor,
p_min=0.5,
p_max=99.5,
strictlyPositive=True,
)

overall_stats_dict[current_subject_id][
"ssim"
Expand Down Expand Up @@ -303,14 +331,17 @@ def __percentile_clip(input_tensor, reference_tensor=None, p_min=0.5, p_max=99.5
overall_stats_dict[current_subject_id][
"psnr_01"
] = peak_signal_noise_ratio(
gt_image_infill, output_infill, data_range=(0,1)
gt_image_infill, output_infill, data_range=(0, 1)
).item()

# same as above but with epsilon for robustness
overall_stats_dict[current_subject_id][
"psnr_01_eps"
] = peak_signal_noise_ratio(
gt_image_infill, output_infill, data_range=(0,1), epsilon=sys.float_info.epsilon
gt_image_infill,
output_infill,
data_range=(0, 1),
epsilon=sys.float_info.epsilon,
).item()

pprint(overall_stats_dict)
Expand Down
2 changes: 1 addition & 1 deletion GANDLF/cli/patch_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def patch_extraction(input_path, output_path, config=None):
manager.set_valid_mask(mask, scale)
# Reject patch if any pixels are transparent
manager.add_patch_criteria(alpha_rgb_2d_channel_check)
#manager.add_patch_criteria(pen_marking_check) ### will be added to main code after rigourous experimentation
# manager.add_patch_criteria(pen_marking_check) ### will be added to main code after rigourous experimentation
manager.add_patch_criteria(patch_artifact_check)
# Reject patch if image dimensions are not equal to PATCH_SIZE
patch_dims_check = partial(
Expand Down
4 changes: 3 additions & 1 deletion GANDLF/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def write_header(self, mode="train"):
if os.stat(self.filename).st_size == 0:
mode_lower = mode.lower()
row = "epoch_no," + mode_lower + "_loss,"
row += ",".join([mode_lower + "_" + metric for metric in self.metrics]) + ","
row += (
",".join([mode_lower + "_" + metric for metric in self.metrics]) + ","
)
row = row[:-1]
row += "\n"
self.csv.write(row)
Expand Down
20 changes: 14 additions & 6 deletions GANDLF/metrics/synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ def mean_squared_error(target, prediction) -> torch.Tensor:
return mse(preds=prediction, target=target)


def peak_signal_noise_ratio(target, prediction, data_range=None, epsilon=None) -> torch.Tensor:
def peak_signal_noise_ratio(
target, prediction, data_range=None, epsilon=None
) -> torch.Tensor:
"""
Computes the peak signal to noise ratio between the target and prediction.

Expand All @@ -60,16 +62,22 @@ def peak_signal_noise_ratio(target, prediction, data_range=None, epsilon=None) -
"""

if epsilon == None:
psnr = PeakSignalNoiseRatio() if data_range == None else PeakSignalNoiseRatio(data_range=data_range[1]-data_range[0])
psnr = (
PeakSignalNoiseRatio()
if data_range == None
else PeakSignalNoiseRatio(data_range=data_range[1] - data_range[0])
)
return psnr(preds=prediction, target=target)
else: # implementation of PSNR that does not give 'inf'/'nan' when 'mse==0'
else: # implementation of PSNR that does not give 'inf'/'nan' when 'mse==0'
mse = mean_squared_error(target, prediction)
if data_range == None: #compute data_range like torchmetrics if not given
min_v = 0 if torch.min(target) > 0 else torch.min(target) #look at this line
if data_range == None: # compute data_range like torchmetrics if not given
min_v = (
0 if torch.min(target) > 0 else torch.min(target)
) # look at this line
max_v = torch.max(target)
else:
min_v, max_v = data_range
return 10.0 * torch.log10(((max_v-min_v) ** 2) / (mse + epsilon))
return 10.0 * torch.log10(((max_v - min_v) ** 2) / (mse + epsilon))


def mean_squared_log_error(target, prediction) -> torch.Tensor:
Expand Down
17 changes: 13 additions & 4 deletions GANDLF/models/brain_age.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import torchvision
import traceback


def brainage(parameters):
"""
This function creates a VGG16-based neural network model for brain age prediction.
Expand All @@ -18,7 +19,9 @@ def brainage(parameters):
"""

# Check that the input data is 2D
assert parameters["model"]["dimension"] == 2, "Brain Age predictions only work on 2D data"
assert (
parameters["model"]["dimension"] == 2
), "Brain Age predictions only work on 2D data"

try:
# Load the pretrained VGG16 model
Expand All @@ -38,13 +41,19 @@ def brainage(parameters):
features = list(model.classifier.children())[:-1] # Remove the last layer
features.extend(
[
nn.Linear(num_features, 1024), # Add a linear layer with 1024 output features
nn.Linear(
num_features, 1024
), # Add a linear layer with 1024 output features
nn.ReLU(True), # Add a ReLU activation function
nn.Dropout2d(0.8), # Add a 2D dropout layer with a probability of 0.8
nn.Linear(1024, 1), # Add a linear layer with 1 output feature (for brain age prediction)
nn.Linear(
1024, 1
), # Add a linear layer with 1 output feature (for brain age prediction)
]
)
model.classifier = nn.Sequential(*features) # Replace the model classifier with the modified one
model.classifier = nn.Sequential(
*features
) # Replace the model classifier with the modified one

# Set the "amp" parameter to False (not yet implemented for VGG)
parameters["model"]["amp"] = False
Expand Down
8 changes: 4 additions & 4 deletions GANDLF/training_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def TrainingManager(dataframe, outputDir, parameters, device, resume, reset):
else:
# loop over all trainAndVal_index and construct new dataframe
for subject_idx in trainAndVal_index:
trainingAndValidationData = trainingAndValidationData.append(
trainingAndValidationData = trainingAndValidationData._append(
trainingData_full[
trainingData_full[
trainingData_full.columns[
Expand All @@ -106,7 +106,7 @@ def TrainingManager(dataframe, outputDir, parameters, device, resume, reset):

# loop over all testing_index and construct new dataframe
for subject_idx in testing_index:
testingData = testingData.append(
testingData = testingData._append(
trainingData_full[
trainingData_full[
trainingData_full.columns[
Expand Down Expand Up @@ -199,7 +199,7 @@ def TrainingManager(dataframe, outputDir, parameters, device, resume, reset):

# loop over all train_index and construct new dataframe
for subject_idx in train_index:
trainingData = trainingData.append(
trainingData = trainingData._append(
trainingData_full[
trainingData_full[
trainingData_full.columns[
Expand All @@ -212,7 +212,7 @@ def TrainingManager(dataframe, outputDir, parameters, device, resume, reset):

# loop over all val_index and construct new dataframe
for subject_idx in val_index:
validationData = validationData.append(
validationData = validationData._append(
trainingData_full[
trainingData_full[
trainingData_full.columns[
Expand Down
4 changes: 3 additions & 1 deletion GANDLF/utils/write_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def writeTrainingCSV(
channelsID_list = channelsID.split(",") # split into list

outputToWrite = "SubjectID,"
outputToWrite += ",".join(["Channel_" + str(i) for i, n in enumerate(channelsID_list)]) + ","
outputToWrite += (
",".join(["Channel_" + str(i) for i, n in enumerate(channelsID_list)]) + ","
)
if labelID is not None:
outputToWrite += "Label"
outputToWrite += "\n"
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def run(self):
"torchvision",
"tqdm",
"torchio==0.18.75",
"pandas<2.0.0",
"pandas>=2.0.0",
"scikit-learn>=0.23.2",
"scikit-image>=0.19.1",
"setuptools",
Expand Down Expand Up @@ -119,7 +119,7 @@ def run(self):
version=__version__,
author="MLCommons",
author_email="gandlf@mlcommons.org",
python_requires=">=3.9, <=3.10",
python_requires=">=3.9, <3.11",
packages=find_packages(
where=os.path.dirname(os.path.abspath(__file__)),
exclude=toplevel_package_excludes,
Expand Down
Loading
Loading