From b05b80641ec65938120b225ccf5050878e48f59a Mon Sep 17 00:00:00 2001 From: dPys Date: Tue, 25 Aug 2020 13:25:58 -0500 Subject: [PATCH 1/5] [ENH] Simplify roi masking for node generation --- pynets/cli/pynets_run.py | 4 +- pynets/core/nodemaker.py | 122 ++++++++++++++++++------------- pynets/registration/reg_utils.py | 2 +- 3 files changed, 76 insertions(+), 52 deletions(-) diff --git a/pynets/cli/pynets_run.py b/pynets/cli/pynets_run.py index e2cfccb6..6a5022c0 100755 --- a/pynets/cli/pynets_run.py +++ b/pynets/cli/pynets_run.py @@ -761,7 +761,7 @@ def build_workflow(args, retval): int(list(psutil.virtual_memory())[4]/1000000000) - 2] else: procmem = list(eval(str(resources))) - procmem[1] = procmem[1] - 2 + procmem[1] = procmem[1] - 1 if args.thr is None: thr = float(1.0) else: @@ -832,6 +832,8 @@ def build_workflow(args, retval): else: extract_strategy_list = None roi = args.roi + if isinstance(roi, list): + roi = roi[0] conn_model = args.mod if conn_model: if (isinstance(conn_model, list)) and (len(conn_model) > 1): diff --git a/pynets/core/nodemaker.py b/pynets/core/nodemaker.py index 4f2d2ccc..d3bd310c 100644 --- a/pynets/core/nodemaker.py +++ b/pynets/core/nodemaker.py @@ -544,11 +544,14 @@ def get_node_membership( (parcel_vol.astype("uint16") == 1)))) # Calculate % overlap - try: + if overlap_count > 0: overlap = float(overlap_count / total_count) - except BaseException: - print(f"No overlap with roi mask. Dropping parcel {i}...") + else: + print( + f"No overlap of parcel {i} with rsn mask...") overlap = float(0) + i += 1 + continue if overlap >= perc_overlap: print( @@ -558,7 +561,7 @@ def get_node_membership( RSN_parcels.append(parcel) coords_with_parc.append(coords[i]) net_labels.append(labels[i]) - i = i + 1 + i += 1 coords_mm = list(set(list(tuple(x) for x in coords_with_parc))) rsn_img.uncache() @@ -585,7 +588,8 @@ def parcel_masker( labels, dir_path, ID, - perc_overlap): + perc_overlap, + vox_size='2mm'): """ Evaluate the affinity of any arbitrary list of parcel nodes for a user-specified ROI mask. @@ -625,37 +629,64 @@ def parcel_masker( corresponding to ROI masks with a spatial affinity to the specified ROI mask. """ - from pynets.core import nodemaker - from nilearn.image import resample_img - from nilearn import masking - import os.path as op + from nilearn.image import resample_to_img + from nilearn.image import math_img + from nilearn.image import index_img import types + import yaml + import pkg_resources + import sys - mask_img = nib.load(roi) - mask_aff = mask_img.affine - mask_data, _ = masking._load_mask_img(roi) - mask_img.uncache() + mask_img = math_img("img > 0", img=nib.load(roi)) + + with open( + pkg_resources.resource_filename("pynets", "runconfig.yaml"), "r" + ) as stream: + hardcoded_params = yaml.load(stream) + try: + template_name = hardcoded_params["template"][0] + except KeyError: + print( + "No template specified in runconfig.yaml" + ) + sys.exit(0) + stream.close() + + template_brain = pkg_resources.resource_filename( + "pynets", f"templates/{template_name}_brain_{vox_size}.nii.gz" + ) + try: + template_img = nib.load(template_brain) + except indexed_gzip.ZranError as e: + print(e, + f"\nCannot load MNI template. Do you have git-lfs " + f"installed?") + sys.exit(1) + + mask_img_res = resample_to_img( + mask_img, template_img, + ) + + mask_data = mask_img_res.get_fdata() if isinstance(parcel_list, types.GeneratorType): parcel_list = [i for i in parcel_list] + elif isinstance(parcel_list, str): + parcel_list_img = nib.load(parcel_list) + parcel_list = [index_img(parcel_list_img, i) for i in + range(parcel_list_img.shape[-1])] i = 0 indices = [] for parcel in parcel_list: - parcel_vol = np.zeros(mask_data.shape, dtype=bool) - parcel_data_reshaped = np.asarray( - resample_img( - parcel, target_affine=mask_aff, target_shape=mask_data.shape - ).dataobj - ) - parcel_vol[parcel_data_reshaped == 1] = 1 - + parcel = resample_to_img(parcel, template_img, interpolation='nearest') # Count number of unique voxels where overlap of parcel and mask occurs overlap_count = len( np.unique( np.where( (mask_data.astype("uint16") == 1) - & (parcel_vol.astype("uint16") == 1) + & (np.asarray(parcel.dataobj + ).astype('bool').astype("uint16") == 1) ) ) ) @@ -664,16 +695,22 @@ def parcel_masker( total_count = len( np.unique( np.where( - (parcel_vol.astype("uint16") == 1)))) + (np.asarray(parcel.dataobj + ).astype('bool').astype("uint16") == 1) + ) + ) + ) # Calculate % overlap - try: + if overlap_count > 0: overlap = float(overlap_count / total_count) - except BaseException: + else: print( - f"\nWarning: No overlap of parcel {labels[i]} with roi" - f" mask!\n") - overlap = float(0) + f"No overlap of parcel {labels[i]} with roi" + f" mask...") + indices.append(i) + i += 1 + continue if overlap >= perc_overlap: print( @@ -682,7 +719,7 @@ def parcel_masker( ) else: indices.append(i) - i = i + 1 + i += 1 labels_adj = list(labels) coords_adj = list(tuple(x) for x in coords) @@ -697,24 +734,6 @@ def parcel_masker( " brain mask/roi..." ) - if any(isinstance(sub, tuple) for sub in labels_adj): - label_intensities = [i[1] for i in labels_adj] - elif any(isinstance(sub, dict) for sub in labels_adj): - label_intensities = None - else: - label_intensities = labels_adj - - # Create a resampled 3D atlas that can be viewed alongside mask img for QA - resampled_parcels_nii_path = f"{dir_path}/{ID}_parcels_resampled2roimask" \ - f"_{op.basename(roi).split('.')[0]}.nii.gz" - resampled_parcels_map_nifti = resample_img( - nodemaker.create_parcel_atlas(parcel_list_adj, label_intensities)[0], - target_affine=mask_aff, - target_shape=mask_data.shape, - interpolation="nearest", - ) - nib.save(resampled_parcels_map_nifti, resampled_parcels_nii_path) - resampled_parcels_map_nifti.uncache() if not coords_adj: raise ValueError( "\nERROR: ROI mask was likely too restrictive and yielded < 2" @@ -755,10 +774,13 @@ def coords_masker(roi, coords, labels, error): Filtered list of string labels corresponding to ROI nodes with a spatial affinity for the specified ROI mask. """ - from nilearn import masking + import nibabel as nib + from nilearn.image import math_img from pynets.core.nodemaker import mmToVox - mask_data, mask_aff = masking._load_mask_img(roi) + mask_img = math_img("img > 0", img=nib.load(roi)) + mask_aff = mask_img.affine + mask_data = mask_img.get_fdata() x_vox = np.diagonal(mask_aff[:3, 0:3])[0] y_vox = np.diagonal(mask_aff[:3, 0:3])[1] z_vox = np.diagonal(mask_aff[:3, 0:3])[2] @@ -1417,7 +1439,7 @@ def node_gen_masking( parc, atlas, uatlas, - perc_overlap=0.75, + perc_overlap=0.10, error=2, ): """ diff --git a/pynets/registration/reg_utils.py b/pynets/registration/reg_utils.py index f6f545b6..1323eed6 100644 --- a/pynets/registration/reg_utils.py +++ b/pynets/registration/reg_utils.py @@ -297,7 +297,7 @@ def waymask2dwi_align( template_img = nib.load(template) waymask_img_res = resample_to_img( - waymask_img, template_img, interpolation="nearest" + waymask_img, template_img, ) waymask_res = f"{waymask.split('.nii')[0]}_res.nii.gz" nib.save(waymask_img_res, waymask_res) From 288b33b3a639399f607170ac575a71459a33f712 Mon Sep 17 00:00:00 2001 From: dPys Date: Tue, 25 Aug 2020 15:40:32 -0500 Subject: [PATCH 2/5] [ENH] Simplify roi masking for node generation --- pynets/core/nodemaker.py | 50 +++++++++++++++++++++---- pynets/core/workflows.py | 13 ++++++- pynets/stats/global_graph_measures.yaml | 2 +- pynets/stats/local_graph_measures.yaml | 2 +- pynets/stats/prediction.py | 49 +++++++++++++++--------- tests/test_nodemaker.py | 9 +++-- 6 files changed, 93 insertions(+), 32 deletions(-) diff --git a/pynets/core/nodemaker.py b/pynets/core/nodemaker.py index d3bd310c..f009b2f7 100644 --- a/pynets/core/nodemaker.py +++ b/pynets/core/nodemaker.py @@ -589,7 +589,7 @@ def parcel_masker( dir_path, ID, perc_overlap, - vox_size='2mm'): + vox_size): """ Evaluate the affinity of any arbitrary list of parcel nodes for a user-specified ROI mask. @@ -667,7 +667,7 @@ def parcel_masker( mask_img, template_img, ) - mask_data = mask_img_res.get_fdata() + mask_data = mask_img_res.get_fdata().astype('bool') if isinstance(parcel_list, types.GeneratorType): parcel_list = [i for i in parcel_list] @@ -745,7 +745,7 @@ def parcel_masker( return coords_adj, labels_adj, parcel_list_adj -def coords_masker(roi, coords, labels, error): +def coords_masker(roi, coords, labels, error, vox_size='2mm'): """ Evaluate the affinity of any arbitrary list of coordinate nodes for a user-specified ROI mask. @@ -777,10 +777,45 @@ def coords_masker(roi, coords, labels, error): import nibabel as nib from nilearn.image import math_img from pynets.core.nodemaker import mmToVox + import yaml + import pkg_resources + import sys + from nilearn.image import resample_to_img mask_img = math_img("img > 0", img=nib.load(roi)) - mask_aff = mask_img.affine - mask_data = mask_img.get_fdata() + + with open( + pkg_resources.resource_filename("pynets", "runconfig.yaml"), "r" + ) as stream: + hardcoded_params = yaml.load(stream) + try: + template_name = hardcoded_params["template"][0] + except KeyError: + print( + "No template specified in runconfig.yaml" + ) + sys.exit(0) + stream.close() + + template_brain = pkg_resources.resource_filename( + "pynets", f"templates/{template_name}_brain_{vox_size}.nii.gz" + ) + try: + template_img = nib.load(template_brain) + except indexed_gzip.ZranError as e: + print(e, + f"\nCannot load MNI template. Do you have git-lfs " + f"installed?") + sys.exit(1) + + mask_img_res = resample_to_img( + mask_img, template_img, + ) + + mask_data = mask_img_res.get_fdata().astype('bool') + + mask_aff = mask_img_res.affine + x_vox = np.diagonal(mask_aff[:3, 0:3])[0] y_vox = np.diagonal(mask_aff[:3, 0:3])[1] z_vox = np.diagonal(mask_aff[:3, 0:3])[2] @@ -1439,8 +1474,9 @@ def node_gen_masking( parc, atlas, uatlas, + vox_size, perc_overlap=0.10, - error=2, + error=2 ): """ In the case that masking was applied, this function generate nodes based @@ -1510,7 +1546,7 @@ def node_gen_masking( # For parcel masking, specify overlap thresh and error cushion in mm voxels [coords, labels, parcel_list_masked] = nodemaker.parcel_masker( - roi, coords, parcel_list, labels, dir_path, ID, perc_overlap + roi, coords, parcel_list, labels, dir_path, ID, perc_overlap, vox_size, ) if any(isinstance(sub, tuple) for sub in labels): diff --git a/pynets/core/workflows.py b/pynets/core/workflows.py index d9181e14..7fcfe5f6 100644 --- a/pynets/core/workflows.py +++ b/pynets/core/workflows.py @@ -1649,6 +1649,7 @@ def dmri_connectometry( "parc", "atlas", "uatlas", + "vox_size" ], output_names=[ "net_parcels_map_nifti", @@ -1663,6 +1664,11 @@ def dmri_connectometry( ), name="node_gen_node", ) + dmri_connectometry_wf.connect( + [ + (inputnode, node_gen_node, [("vox_size", "vox_size")]), + ] + ) else: # Non-masking case node_gen_node = pe.Node( @@ -4238,6 +4244,7 @@ def fmri_connectometry( "parc", "atlas", "uatlas", + "vox_size" ], output_names=[ "net_parcels_map_nifti", @@ -4252,7 +4259,11 @@ def fmri_connectometry( ), name="node_gen_node", ) - + fmri_connectometry_wf.connect( + [ + (inputnode, node_gen_node, [("vox_size", "vox_size")]), + ] + ) else: # Non-masking case node_gen_node = pe.Node( diff --git a/pynets/stats/global_graph_measures.yaml b/pynets/stats/global_graph_measures.yaml index 9121c76e..e6b48c31 100644 --- a/pynets/stats/global_graph_measures.yaml +++ b/pynets/stats/global_graph_measures.yaml @@ -5,5 +5,5 @@ metric_list_global: - 'average_clustering' - 'average_shortest_path_length' # - 'graph_number_of_cliques' -# - 'smallworldness' + - 'smallworldness' - 'transitivity' diff --git a/pynets/stats/local_graph_measures.yaml b/pynets/stats/local_graph_measures.yaml index 04a86ac7..f56a084d 100644 --- a/pynets/stats/local_graph_measures.yaml +++ b/pynets/stats/local_graph_measures.yaml @@ -1,5 +1,5 @@ metric_list_nodal: -# - 'participation_coefficient' + - 'participation_coefficient' # - 'diversity_coefficient' - 'local_efficiency' - 'local_clustering' diff --git a/pynets/stats/prediction.py b/pynets/stats/prediction.py index 32696856..23ec3df3 100644 --- a/pynets/stats/prediction.py +++ b/pynets/stats/prediction.py @@ -728,7 +728,8 @@ def bootstrapped_nested_cv(X, y, n_boots=10, var_thr=.8, k_folds=10, # Remove columns with > 20% missing values X = X.dropna(thresh=len(X) * .80, axis=1) if X.empty: - return grand_mean_best_estimator, grand_mean_best_Rsquared, grand_mean_best_MSE, {} + return grand_mean_best_estimator, grand_mean_best_Rsquared, \ + grand_mean_best_MSE, {} # Apply a simple imputer (note that this assumes extreme cases of # missingness have already been addressed) @@ -740,21 +741,24 @@ def bootstrapped_nested_cv(X, y, n_boots=10, var_thr=.8, k_folds=10, sel.fit(X) X = X[X.columns[sel.get_support(indices=True)]] if X.empty: - return grand_mean_best_estimator, grand_mean_best_Rsquared, grand_mean_best_MSE, {} + return grand_mean_best_estimator, grand_mean_best_Rsquared, \ + grand_mean_best_MSE, {} # Remove multicollinear columns if remove_multi is True: transformer = ReduceVIF() X = transformer.fit_transform(X, y) if X.empty: - return grand_mean_best_estimator, grand_mean_best_Rsquared, grand_mean_best_MSE, {} + return grand_mean_best_estimator, grand_mean_best_Rsquared, \ + grand_mean_best_MSE, {} # Remove outliers outlier_mask = (np.abs(stats.zscore(X)) < float(std_dev)).all(axis=1) X = X[outlier_mask] y = y[outlier_mask] if X.empty: - return grand_mean_best_estimator, grand_mean_best_Rsquared, grand_mean_best_MSE, {} + return grand_mean_best_estimator, grand_mean_best_Rsquared, \ + grand_mean_best_MSE, {} # Standardize X scaler = StandardScaler() @@ -764,8 +768,7 @@ def bootstrapped_nested_cv(X, y, n_boots=10, var_thr=.8, k_folds=10, # scaler = StandardScaler() # y = pd.DataFrame(scaler.fit_transform(y.reshape(-1,1))) - # Bootstrap train-test split - # Repeated test-train splits "simulates" the variability of incoming data, + # Bootstrap nested CV's "simulates" the variability of incoming data, # particularly when training on smaller datasets feature_imp_dicts = [] best_positions_list = [] @@ -816,10 +819,13 @@ def bootstrapped_nested_cv(X, y, n_boots=10, var_thr=.8, k_folds=10, best_positions = list(X.columns) else: best_positions = [column[0] for - column in zip(X.columns, - fitted.named_steps['feature_select'].get_support(indices=True)) if column[1]] + column in + zip(X.columns, fitted.named_steps[ + 'feature_select'].get_support( + indices=True)) if column[1]] - coefs = np.abs(fitted.named_steps[best_regressor.split('_')[0]].coef_) + coefs = np.abs(fitted.named_steps[ + best_regressor.split('_')[0]].coef_) feat_imp_dict = OrderedDict(sorted(dict(zip(best_positions, coefs)).items(), @@ -858,7 +864,8 @@ def bootstrapped_nested_cv(X, y, n_boots=10, var_thr=.8, k_folds=10, def make_subject_dict(modalities, base_dir, thr_type, mets, embedding_types, template, sessions): from joblib import Parallel, delayed - rsns = ['SalVentAttnA', 'DefaultA', 'ContB'] + #rsns = ['SalVentAttnA', 'DefaultA', 'ContB'] + rsns = ['triple'] hyperparams_func = ["rsn", "res", "model", 'hpass', 'extract', 'smooth'] hyperparams_dwi = ["rsn", "res", "model", 'directget', 'minlength', 'tol'] @@ -869,16 +876,19 @@ def make_subject_dict(modalities, base_dir, thr_type, mets, embedding_types, for alg in embedding_types: for ses_name in sessions: if alg == 'ASE' or alg == 'OMNI': - ids = [f"{os.path.basename(i)}_ses-{ses_name}" for i in glob.glob( + ids = [f"{os.path.basename(i)}_ses-{ses_name}" for i in + glob.glob( f"{base_dir}/embeddings_all_{modality}/*") if os.path.basename(i).startswith('sub')] else: - ids = [f"{os.path.basename(i)}_ses-{ses_name}" for i in glob.glob( + ids = [f"{os.path.basename(i)}_ses-{ses_name}" for i in + glob.glob( f"{base_dir}/pynets/*") if os.path.basename(i).startswith('sub')] if alg == 'ASE' or alg == 'OMNI': - ensembles = get_ensembles_embedding(modality, alg, base_dir) + ensembles = get_ensembles_embedding(modality, alg, + base_dir) df_top = None if ensembles is None: continue @@ -896,10 +906,11 @@ def make_subject_dict(modalities, base_dir, thr_type, mets, embedding_types, hyperparam_dict = {} grid = build_grid(modality, hyperparam_dict, - sorted(list(set(hyperparams))), ensembles)[1] + sorted(list(set(hyperparams))), + ensembles)[1] - grid_mod = list(set([tuple(x for x in i if x not in rsns) for i in - grid])) + grid_mod = list(set([tuple(x for x in i if x not in rsns) + for i in grid])) # Since we are using all of the 3 RSN connectomes (pDMN, coSN, and # fECN) in the feature-space, @@ -1088,7 +1099,8 @@ def filter_cols_from_targets(df_top, targets): else: print( f"\nMultiple structural embeddings found for {id} and" - f" recipe {comb_tuple}:\n{embeddings}\nTaking the most recent...") + f" recipe {comb_tuple}:\n{embeddings}\nTaking the most" + f" recent...") embedding = \ sorted(embeddings, key=os.path.getmtime)[0] if os.path.isfile(embedding): @@ -1343,7 +1355,8 @@ def _run_interface(self, runtime): X = pd.read_csv( self.inputs.X, chunksize=100000).read() [grand_mean_best_estimator, grand_mean_best_Rsquared, - grand_mean_best_MSE, mega_feat_imp_dict] = bootstrapped_nested_cv( + grand_mean_best_MSE, mega_feat_imp_dict] = \ + bootstrapped_nested_cv( X, self.inputs.y) if len(mega_feat_imp_dict) > 1: print(f"Target Outcome: {self.inputs.target_var}") diff --git a/tests/test_nodemaker.py b/tests/test_nodemaker.py index dfdd3110..d781b395 100755 --- a/tests/test_nodemaker.py +++ b/tests/test_nodemaker.py @@ -96,8 +96,8 @@ def test_nodemaker_tools_nilearn_coords_RSN(atlas): print("%s%s%s" % ('fetch_nilearn_atlas_coords --> finished: ', str(np.round(time.time() - start_time, 1)), 's')) start_time = time.time() - [net_coords, _, net_labels, network] = nodemaker.get_node_membership(network, template, coords, labels, parc, - parcel_list) + [net_coords, _, net_labels, network] = nodemaker.get_node_membership( + network, template, coords, labels, parc, parcel_list) print("%s%s%s" % ('get_node_membership --> finished: ', str(np.round(time.time() - start_time, 1)), 's')) assert coords is not None @@ -114,7 +114,8 @@ def test_nodemaker_tools_masking_parlistfile_RSN(): # Set example inputs import pkg_resources base_dir = str(Path(__file__).parent/"examples") - template = pkg_resources.resource_filename("pynets", f"templates/MNI152_T1_brain_2mm.nii.gz") + template = pkg_resources.resource_filename("pynets", + f"templates/MNI152_T1_brain_2mm.nii.gz") dir_path = f"{base_dir}/BIDS/sub-25659/ses-1/func" parlistfile = f"{base_dir}/miscellaneous/whole_brain_cluster_labels_PCA200.nii.gz" roi = f"{base_dir}/miscellaneous/pDMN_3_bin.nii.gz" @@ -289,7 +290,7 @@ def test_nodemaker_tools_masking_parlistfile_WB(): WB_parcel_list = nodemaker.gen_img_list(parlistfile) [WB_net_parcels_map_nifti_masked, WB_coords_masked, WB_labels_masked, _, _, _] = nodemaker.node_gen_masking(roi, WB_coords, WB_parcel_list, WB_labels, dir_path, ID, parc, atlas, - parlistfile) + parlistfile, vox_size='2mm') WB_parcel_list = nodemaker.gen_img_list(parlistfile) [_, _, WB_parcel_list_masked] = nodemaker.parcel_masker(roi, WB_coords, WB_parcel_list, WB_labels, From 084f0c8be35d94822a60f60720b40452e4bebee4 Mon Sep 17 00:00:00 2001 From: dPys Date: Tue, 25 Aug 2020 15:55:44 -0500 Subject: [PATCH 3/5] [ENH] Simplify roi masking for node generation --- README.md | 8 ++-- pynets/cli/pynets_run.py | 4 +- pynets/config/bids_config.json | 2 +- .../bids_config_anatomical_atlases.json | 2 +- .../bids_config_anatomical_atlases_test.json | 2 +- pynets/config/bids_config_bold.json | 3 +- pynets/config/bids_config_clustering.json | 2 +- pynets/config/bids_config_dwi.json | 2 +- pynets/config/bids_config_embed.json | 2 +- pynets/config/bids_config_rsns.json | 2 +- pynets/config/bids_config_test.json | 2 +- pynets/stats/prediction.py | 43 ++++++++++++------- 12 files changed, 43 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 8334c03f..e4ffb393 100644 --- a/README.md +++ b/README.md @@ -79,10 +79,10 @@ where the `-config` flag specifies that path to a .json configuration spec that "es": "['mean', 'median']" # Indicates the method(s) of nodal time-series signal extraction. }, "dwi": { # dMRI options. If you only have structural (i.e. DWI) data, set each of the `func` options to "None" - "dg": "None", - "ml": "None", - "mod": "None", - "em": "None" + "dg": "det", # The directional assumptions of tractography (e.g. deterministic, probabilistic) + "ml": "40", # The minimum length criterion for streamlines in tractography + "mod": "csd", # The diffusion model type + "tol": "8" # The tolerance distance (in the units of the streamlines, usually mm). If any node in the streamline is within this distance from the center of any voxel in the ROI, then the connection is counted as an edge" }, "gen": { # These are general options that apply to all modalities "a": "['BrainnetomeAtlasFan2016', 'atlas_harvard_oxford', 'destrieux2009_rois']", # Anatomical atlases to define nodes. diff --git a/pynets/cli/pynets_run.py b/pynets/cli/pynets_run.py index 6a5022c0..c4b138a8 100755 --- a/pynets/cli/pynets_run.py +++ b/pynets/cli/pynets_run.py @@ -401,7 +401,7 @@ def get_parser(): "minimums, separate the list by space (e.g. 10 30 50).\n", ) parser.add_argument( - "-em", + "-tol", metavar="Error margin", default=8, nargs="+", @@ -987,7 +987,7 @@ def build_workflow(args, retval): min_length_list = None else: min_length_list = None - error_margin = args.em + error_margin = args.tol if error_margin: if (isinstance(error_margin, list)) and (len(error_margin) > 1): error_margin_list = error_margin diff --git a/pynets/config/bids_config.json b/pynets/config/bids_config.json index 6d4c0888..77b15552 100644 --- a/pynets/config/bids_config.json +++ b/pynets/config/bids_config.json @@ -3,7 +3,7 @@ "dg": "['prob', 'det']", "ml": "['10', '40']", "mod": "['csd', 'csa']", - "em": 8 + "tol": 8 }, "func": { "ct": "['rena', 'ward', 'kmeans']", diff --git a/pynets/config/bids_config_anatomical_atlases.json b/pynets/config/bids_config_anatomical_atlases.json index 1309d92d..df3b614a 100644 --- a/pynets/config/bids_config_anatomical_atlases.json +++ b/pynets/config/bids_config_anatomical_atlases.json @@ -3,7 +3,7 @@ "dg": "['det', 'prob', 'clos']", "ml": "['0', '20', '40']", "mod": "['csd', 'csa', 'tensor', 'sfm']", - "em": 8 + "tol": 8 }, "func": { "ct": "None", diff --git a/pynets/config/bids_config_anatomical_atlases_test.json b/pynets/config/bids_config_anatomical_atlases_test.json index ff43adcf..8084edeb 100644 --- a/pynets/config/bids_config_anatomical_atlases_test.json +++ b/pynets/config/bids_config_anatomical_atlases_test.json @@ -3,7 +3,7 @@ "dg": "['prob']", "ml": "['0']", "mod": "['csd']", - "em": 8 + "tol": 8 }, "func": { "ct": "None", diff --git a/pynets/config/bids_config_bold.json b/pynets/config/bids_config_bold.json index c53be9cb..ead681fd 100644 --- a/pynets/config/bids_config_bold.json +++ b/pynets/config/bids_config_bold.json @@ -2,7 +2,8 @@ "dwi": { "dg": "None", "ml": "None", - "mod": "None" + "mod": "None", + "tol": "None" }, "func": { "ct": "None", diff --git a/pynets/config/bids_config_clustering.json b/pynets/config/bids_config_clustering.json index 2cafead9..43c8c850 100644 --- a/pynets/config/bids_config_clustering.json +++ b/pynets/config/bids_config_clustering.json @@ -3,7 +3,7 @@ "dg": "None", "ml": "None", "mod": "None", - "em": "None" + "tol": "None" }, "func": { "ct": "['rena']", diff --git a/pynets/config/bids_config_dwi.json b/pynets/config/bids_config_dwi.json index 7717dfd5..1bf270af 100644 --- a/pynets/config/bids_config_dwi.json +++ b/pynets/config/bids_config_dwi.json @@ -3,7 +3,7 @@ "dg": "['det', 'prob']", "ml": "['0', '20']", "mod": "['csa']", - "em": 8 + "tol": 8 }, "func": { "ct": "None", diff --git a/pynets/config/bids_config_embed.json b/pynets/config/bids_config_embed.json index f376cfab..4eb27de2 100644 --- a/pynets/config/bids_config_embed.json +++ b/pynets/config/bids_config_embed.json @@ -3,7 +3,7 @@ "dg": "['clos']", "ml": "['0']", "mod": "['csa']", - "em": 8 + "tol": 8 }, "func": { "ct": "None", diff --git a/pynets/config/bids_config_rsns.json b/pynets/config/bids_config_rsns.json index e9ee1453..9f2c5ba8 100644 --- a/pynets/config/bids_config_rsns.json +++ b/pynets/config/bids_config_rsns.json @@ -3,7 +3,7 @@ "dg": "['clos']", "ml": "['0']", "mod": "['csa']", - "em": 8 + "tol": 8 }, "func": { "ct": "None", diff --git a/pynets/config/bids_config_test.json b/pynets/config/bids_config_test.json index 674e3834..bb872625 100644 --- a/pynets/config/bids_config_test.json +++ b/pynets/config/bids_config_test.json @@ -3,7 +3,7 @@ "dg": "None", "ml": "None", "mod": "None", - "em": "None" + "tol": "None" }, "func": { "ct": "None", diff --git a/pynets/stats/prediction.py b/pynets/stats/prediction.py index 23ec3df3..56b20520 100644 --- a/pynets/stats/prediction.py +++ b/pynets/stats/prediction.py @@ -184,7 +184,7 @@ def calculate_vif(X, thresh=5.0): return X -# We create a callable class, called `RazorCV`, +# We create a callable class, called `RazorCV` class RazorCV(object): ''' PR to SKlearn by dPys 2019 @@ -197,8 +197,7 @@ class RazorCV(object): within one standard error (or some percentile/alpha-level tolerance) of the empirically optimal model. This assumes that the models can be easily ordered from simplest to most complex based on some user-defined target - parameter. By enabling the user to specify this parameter of interest, - whether greater values of this parameter are to be defined as + parameter. Greater values of this parameter are to be defined as 'more complex', and a target scoring metric (i.e. in the case of multi-metric scoring), the `RazorCV` function can be called directly by `refit` (e.g. in GridSearchCV). @@ -230,7 +229,7 @@ class RazorCV(object): Notes ----- - Here, simplest is defined by the complexity of the model as influenced by + Here, 'simplest' is defined by the complexity of the model as influenced by some user-defined target parameter (e.g. number of components, number of estimators, polynomial degree, cost, scale, number hidden units, weight decay, number of nearest neighbors, L1/L2 penalty, etc.). @@ -262,7 +261,8 @@ def _check_scorer(self): Check whether the target refit scorer is negated. If so, adjusted greater_is_better accordingly. """ - if self.scoring not in self.scoring_dict.keys() and f"{self.scoring}_score" not in self.scoring_dict.keys(): + if self.scoring not in self.scoring_dict.keys() and \ + f"{self.scoring}_score" not in self.scoring_dict.keys(): if self.scoring.startswith('neg_'): self.greater_is_better = True else: @@ -489,7 +489,8 @@ def nested_fit(X, y, regressors, boot, pca_reduce, k_folds): means_all_exp_var = {} means_all_MSE = {} - # Model + feature selection by iterating grid-search across linear regressors + # Model + feature selection by iterating grid-search across linear + # regressors for regressor_name, regressor in sorted(regressors.items()): if pca_reduce is True and X.shape[0] < X.shape[1]: # Pipeline feature selection (PCA) with model fitting @@ -502,7 +503,8 @@ def nested_fit(X, y, regressors, boot, pca_reduce, k_folds): 'feature_select__n_components': n_comps} refit = RazorCV.standard_error('n_components', True, refit_score) else: - # <25 Features, don't perform feature selection. + # <25 Features, don't perform feature selection, but produce a + # userwarning if X.shape[1] < 25: pipe = Pipeline([ (regressor_name, regressor), @@ -539,7 +541,8 @@ def nested_fit(X, y, regressors, boot, pca_reduce, k_folds): means_MSE = pipe_grid_cv.cv_results_[ f"mean_test_neg_root_mean_squared_error"] - # Apply PCA in the case that the # of features exceeds the number of observations + # Apply PCA in the case that the # of features exceeds the number of + # observations if pca_reduce is True and X.shape[0] < X.shape[1]: best_estimator_name = f"{regressor_name}_{pipe_grid_cv.best_estimator_.get_params()[regressor_name + '__alpha']}_{pipe_grid_cv.best_estimator_.named_steps['feature_select'].n_components}" else: @@ -569,9 +572,11 @@ def nested_fit(X, y, regressors, boot, pca_reduce, k_folds): [(best_regressor.split('_')[0], est)]) else: est.alpha = float(best_regressor.split('_')[-2]) - kbest = SelectKBest(f_regression, k=int(best_regressor.split('_')[-1])) + kbest = SelectKBest(f_regression, + k=int(best_regressor.split('_')[-1])) reg = Pipeline( - [('feature_select', kbest), (best_regressor.split('_')[0], est)]) + [('feature_select', kbest), + (best_regressor.split('_')[0], est)]) return reg, best_regressor @@ -633,7 +638,8 @@ def flatten_latent_positions(rsn, subject_dict, ID, ses, modality, grid_param, a return df_lps -def create_feature_space(df, grid_param, subject_dict, ses, modality, alg, mets=None): +def create_feature_space(df, grid_param, subject_dict, ses, modality, alg, + mets=None): df_tmps = [] rsns = ['SalVentAttnA', 'DefaultA', 'ContB'] @@ -816,6 +822,7 @@ def bootstrapped_nested_cv(X, y, n_boots=10, var_thr=.8, k_folds=10, reverse=True)) else: if X.shape[1] < 25: + print(UserWarning('Total number of features is <25!')) best_positions = list(X.columns) else: best_positions = [column[0] for @@ -1665,7 +1672,8 @@ def build_predict_workflow(base_dir, dict_file_path, modality_grids, drop_cols, target_modality = 'dwi' template = 'MNI152_T1' mets = ["global_efficiency", "average_clustering", - "average_shortest_path_length", "average_local_efficiency_nodewise", + "average_shortest_path_length", + "average_local_efficiency_nodewise", "average_betweenness_centrality", "average_eigenvector_centrality"] @@ -1696,15 +1704,18 @@ def build_predict_workflow(base_dir, dict_file_path, modality_grids, drop_cols, df = df[df['participant_id'].isin(list(sub_dict_clean.keys()))] df = df[['participant_id', 'rum_persist', 'rum_1', 'dep_1', 'age']] # Remove 4 outliers in the behavioral data - df = df.loc[(df['participant_id'] != '33') & (df['participant_id'] != '21') & - (df['participant_id'] != '54') & (df['participant_id'] != '14')] + df = df.loc[(df['participant_id'] != '33') & + (df['participant_id'] != '21') & + (df['participant_id'] != '54') & + (df['participant_id'] != '14')] dict_file_path = make_feature_space_dict(df, modalities, sub_dict_clean, sessions[0], base_dir, mets) with Manager() as mgr: retval = mgr.dict() - p = Process(target=build_predict_workflow, args=(base_dir, dict_file_path, modality_grids, drop_cols, - target_vars, embedding_types, target_modality)) + p = Process(target=build_predict_workflow, + args=(base_dir, dict_file_path, modality_grids, drop_cols, + target_vars, embedding_types, target_modality)) p.start() p.join() From 1c540016a7b9237639ffaf9c5d9234702f714e53 Mon Sep 17 00:00:00 2001 From: dPys Date: Tue, 25 Aug 2020 17:04:39 -0500 Subject: [PATCH 4/5] [ENH] add compatibility for uncompressed input files, add JoinNode logic for tolerance iterables --- pynets/__about__.py | 2 +- pynets/cli/pynets_run.py | 3 +- pynets/core/workflows.py | 109 +++++++++++++++++++++++++++---- pynets/registration/reg_utils.py | 12 ++-- 4 files changed, 106 insertions(+), 20 deletions(-) diff --git a/pynets/__about__.py b/pynets/__about__.py index be5c7965..82f640a9 100644 --- a/pynets/__about__.py +++ b/pynets/__about__.py @@ -6,7 +6,7 @@ # from ._version import get_versions # __version__ = get_versions()['version'] # del get_versions -__version__ = "1.0.11" +__version__ = "1.0.12" __packagename__ = "pynets" __copyright__ = "Copyright 2017, Derek Pisner" diff --git a/pynets/cli/pynets_run.py b/pynets/cli/pynets_run.py index c4b138a8..5dda1fae 100755 --- a/pynets/cli/pynets_run.py +++ b/pynets/cli/pynets_run.py @@ -1845,7 +1845,8 @@ def build_workflow(args, retval): 'roi_neighborhood_tol preset cannot be less than ' 'the value of the structural connectome error_margin' ' parameter.') - print(f"{Fore.GREEN}Iterating minimum streamline lengths:") + print(f"{Fore.GREEN}Iterating ROI-streamline intersection " + f"tolerance:") print(f"{Fore.BLUE}{', '.join(error_margin_list)}") if target_samples: diff --git a/pynets/core/workflows.py b/pynets/core/workflows.py index 7fcfe5f6..57ef6253 100644 --- a/pynets/core/workflows.py +++ b/pynets/core/workflows.py @@ -1979,7 +1979,18 @@ def dmri_connectometry( streams2graph_node._mem_gb = runtime_dict["streams2graph_node"][1] if error_margin_list: - streams2graph_node.iterables = [("error_margin", error_margin_list)] + streams2graph_node.inputs.error_margin = error_margin_list[0] + streams2graph_node.iterables = [("error_margin", + error_margin_list[1:])] + else: + dmri_connectometry_wf.connect( + [ + ( + dsn_node, streams2graph_node, + [("error_margin", "error_margin")], + ) + ] + ) # Create outputnode to capture results of nested workflow outputnode = pe.Node( @@ -2496,10 +2507,29 @@ def dmri_connectometry( joinsource=prep_spherical_nodes_node, joinfield=map_fields, ) + if error_margin_list is not None: + join_iters_node_em = pe.JoinNode( + niu.IdentityInterface(fields=map_fields), + name="join_iters_node_em", + joinsource=streams2graph_node, + joinfield=map_fields, + ) + dmri_connectometry_wf.connect( + [ + (thr_info_node, join_iters_node_em, map_connects), + (join_iters_node_em, join_iters_node_prep_spheres, + map_connects) + ] + ) + else: + dmri_connectometry_wf.connect( + [ + (thr_info_node, join_iters_node_prep_spheres, + map_connects), + ] + ) dmri_connectometry_wf.connect( [ - (thr_info_node, join_iters_node_prep_spheres, - map_connects), (join_iters_node_prep_spheres, join_iters_node, map_connects), ] @@ -2514,9 +2544,29 @@ def dmri_connectometry( joinsource=run_tracking_node, joinfield=map_fields, ) + if error_margin_list is not None: + join_iters_node_em = pe.JoinNode( + niu.IdentityInterface(fields=map_fields), + name="join_iters_node_em", + joinsource=streams2graph_node, + joinfield=map_fields, + ) + dmri_connectometry_wf.connect( + [ + (thr_info_node, join_iters_node_em, map_connects), + (join_iters_node_em, join_iters_node_run_track, + map_connects) + ] + ) + else: + dmri_connectometry_wf.connect( + [ + (thr_info_node, join_iters_node_run_track, + map_connects), + ] + ) dmri_connectometry_wf.connect( [ - (thr_info_node, join_iters_node_run_track, map_connects), (join_iters_node_run_track, join_iters_node, map_connects), ] ) @@ -2527,9 +2577,26 @@ def dmri_connectometry( and not node_size_list ): # print('No connectivity model or node extraction iterables...') - dmri_connectometry_wf.connect( - [(thr_info_node, join_iters_node, map_connects)] - ) + if error_margin_list is not None: + join_iters_node_em = pe.JoinNode( + niu.IdentityInterface(fields=map_fields), + name="join_iters_node_em", + joinsource=streams2graph_node, + joinfield=map_fields, + ) + dmri_connectometry_wf.connect( + [ + (thr_info_node, join_iters_node_em, map_connects), + (join_iters_node_em, join_iters_node, + map_connects) + ] + ) + else: + dmri_connectometry_wf.connect( + [ + (thr_info_node, join_iters_node, map_connects), + ] + ) elif (conn_model_list or multi_directget or min_length_list) or ( node_size_list and parc is False ): @@ -2546,10 +2613,29 @@ def dmri_connectometry( joinsource=run_tracking_node, joinfield=map_fields, ) - dmri_connectometry_wf.connect([(thr_info_node, - join_iters_node_run_track, - map_connects), - (join_iters_node_run_track, + if error_margin_list is not None: + join_iters_node_em = pe.JoinNode( + niu.IdentityInterface(fields=map_fields), + name="join_iters_node_em", + joinsource=streams2graph_node, + joinfield=map_fields, + ) + dmri_connectometry_wf.connect( + [ + (thr_info_node, join_iters_node_em, map_connects), + (join_iters_node_em, join_iters_node_run_track, + map_connects) + ] + ) + else: + dmri_connectometry_wf.connect( + [ + (thr_info_node, join_iters_node_run_track, + map_connects), + ] + ) + + dmri_connectometry_wf.connect([(join_iters_node_run_track, join_iters_node_prep_spheres, map_connects, ), @@ -3144,7 +3230,6 @@ def dmri_connectometry( ("directget", "directget"), ("warped_fa", "warped_fa"), ("min_length", "min_length"), - ("error_margin", "error_margin"), ("network", "network"), ], ), diff --git a/pynets/registration/reg_utils.py b/pynets/registration/reg_utils.py index 1323eed6..eae9773c 100644 --- a/pynets/registration/reg_utils.py +++ b/pynets/registration/reg_utils.py @@ -1268,7 +1268,7 @@ def reorient_dwi(dwi_prep, bvecs, out_dir, overwrite=True): if normalized is not input_img: out_fname = ( f"{out_dir}/{dwi_prep.split('/')[-1].split('.nii')[0]}_" - f"reor-RAS.nii{dwi_prep.split('/')[-1].split('.nii')[1]}" + f"reor-RAS.nii.gz" ) if ( overwrite is False @@ -1296,7 +1296,7 @@ def reorient_dwi(dwi_prep, bvecs, out_dir, overwrite=True): else: out_fname = ( f"{out_dir}/{dwi_prep.split('/')[-1].split('.nii')[0]}_" - f"noreor-RAS.nii{dwi_prep.split('/')[-1].split('.nii')[1]}" + f"noreor-RAS.nii.gz" ) out_bvec_fname = bvec_fname @@ -1342,12 +1342,12 @@ def reorient_img(img, out_dir, overwrite=True): print(f"{'Reorienting '}{img}{' to RAS+...'}") out_name = ( f"{out_dir}/{img.split('/')[-1].split('.nii')[0]}_" - f"reor-RAS.nii{img.split('/')[-1].split('.nii')[1]}" + f"reor-RAS.nii.gz" ) else: out_name = ( f"{out_dir}/{img.split('/')[-1].split('.nii')[0]}_" - f"noreor-RAS.nii{img.split('/')[-1].split('.nii')[1]}" + f"noreor-RAS.nii.gz" ) if overwrite is False and os.path.isfile(out_name): @@ -1397,7 +1397,7 @@ def match_target_vox_res(img_file, vox_size, out_dir, overwrite=True): if (abs(zooms[0]), abs(zooms[1]), abs(zooms[2])) != new_zooms: img_file_res = ( f"{out_dir}/{os.path.basename(img_file).split('.nii')[0]}_" - f"res-{vox_size}.nii{os.path.basename(img_file).split('.nii')[1]}" + f"res-{vox_size}.nii.gz" ) if overwrite is False and os.path.isfile(img_file_res): img_file = img_file_res @@ -1421,7 +1421,7 @@ def match_target_vox_res(img_file, vox_size, out_dir, overwrite=True): img_file_nores = ( f"{out_dir}/{os.path.basename(img_file).split('.nii')[0]}_" f"nores-{vox_size}" - f".nii{os.path.basename(img_file).split('.nii')[1]}") + f".nii.gz") if overwrite is False and os.path.isfile(img_file_nores): img_file = img_file_nores pass From eced3a7677f261f0f2d7b69c4b7d11a0eca7fdfb Mon Sep 17 00:00:00 2001 From: dPys Date: Tue, 25 Aug 2020 17:46:42 -0500 Subject: [PATCH 5/5] [MAINT] Restart circle --- pynets/stats/global_graph_measures.yaml | 2 +- tests/test_nodemaker.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pynets/stats/global_graph_measures.yaml b/pynets/stats/global_graph_measures.yaml index e6b48c31..9121c76e 100644 --- a/pynets/stats/global_graph_measures.yaml +++ b/pynets/stats/global_graph_measures.yaml @@ -5,5 +5,5 @@ metric_list_global: - 'average_clustering' - 'average_shortest_path_length' # - 'graph_number_of_cliques' - - 'smallworldness' +# - 'smallworldness' - 'transitivity' diff --git a/tests/test_nodemaker.py b/tests/test_nodemaker.py index d781b395..e3b40075 100755 --- a/tests/test_nodemaker.py +++ b/tests/test_nodemaker.py @@ -142,7 +142,7 @@ def test_nodemaker_tools_masking_parlistfile_RSN(): net_parcel_list, net_labels, dir_path, ID, - perc_overlap) + perc_overlap, vox_size='2mm') print("%s%s%s" % ('parcel_masker --> finished: ', str(np.round(time.time() - start_time, 1)), 's')) start_time = time.time() @@ -294,7 +294,7 @@ def test_nodemaker_tools_masking_parlistfile_WB(): WB_parcel_list = nodemaker.gen_img_list(parlistfile) [_, _, WB_parcel_list_masked] = nodemaker.parcel_masker(roi, WB_coords, WB_parcel_list, WB_labels, - dir_path, ID, perc_overlap) + dir_path, ID, perc_overlap, vox_size='2mm') print("%s%s%s" % ('parcel_masker (Masking whole-brain version) --> finished: ', np.round(time.time() - start_time, 1), 's'))