diff --git a/.gitignore b/.gitignore index bf7527f33..b5239c0a0 100644 --- a/.gitignore +++ b/.gitignore @@ -70,4 +70,7 @@ target/ .vscode/ # Virtualenv -venv/ \ No newline at end of file +venv/ + +# Hidden folder +.hidden/ \ No newline at end of file diff --git a/data/vocabulary/vocabulary.json b/data/vocabulary/vocabulary.json new file mode 100644 index 000000000..ff1fdc3be --- /dev/null +++ b/data/vocabulary/vocabulary.json @@ -0,0 +1,1358 @@ +{ + "scripts": [ + { + "name": "scil_bids_validate.py", + "keywords": [] + }, + { + "name": "scil_bingham_metrics.py", + "keywords": [ + "fiber density", + "fiber spread", + "fiber fraction", + "fixel" + ] + }, + { + "name": "scil_btensor_metrics.py", + "keywords": [ + "b-tensor", + "b-tensor encoding", + "tensor-valued diffusion MRI", + "micro-FA", + "uFA", + "order parameter", + "OP", + "DIVIDE", + "microstructure", + "linear tensor encoding (LTE)", + "planar tensor encoding (PTE)", + "spherical tensor encoding (STE)", + "multidimensional diffusion MRI" + ] + }, + { + "name": "scil_bundle_clean_qbx_clusters.py", + "keywords": [] + }, + { + "name": "scil_bundle_compute_centroid.py", + "keywords": [] + }, + { + "name": "scil_bundle_compute_endpoints_map.py", + "keywords": [] + }, + { + "name": "scil_bundle_diameter.py", + "keywords": [] + }, + { + "name": "scil_bundle_filter_by_occurence.py", + "keywords": [] + }, + { + "name": "scil_bundle_generate_priors.py", + "keywords": [] + }, + { + "name": "scil_bundle_label_map.py", + "keywords": [ + "parcellate", + "subdivide", + "split" + ] + }, + { + "name": "scil_bundle_mean_fixel_afd_from_hdf5.py", + "keywords": [] + }, + { + "name": "scil_bundle_mean_fixel_afd.py", + "keywords": [] + }, + { + "name": "scil_bundle_mean_fixel_bingham_metric.py", + "keywords": [ + "tractometry", + "lobe metrics", + "fiber density", + "fiber spread", + "fiber fraction", + "mean along bundle" + ] + }, + { + "name": "scil_bundle_mean_std.py", + "keywords": [] + }, + { + "name": "scil_bundle_pairwise_comparison.py", + "keywords": [] + }, + { + "name": "scil_bundle_reject_outliers.py", + "keywords": [] + }, + { + "name": "scil_bundle_score_many_bundles_one_tractogram.py", + "keywords": [] + }, + { + "name": "scil_bundle_score_same_bundle_many_segmentations.py", + "keywords": [] + }, + { + "name": "scil_bundle_shape_measures.py", + "keywords": [ + "geometry" + ] + }, + { + "name": "scil_bundle_uniformize_endpoints.py", + "keywords": [] + }, + { + "name": "scil_bundle_volume_per_label.py", + "keywords": [] + }, + { + "name": "scil_connectivity_compare_populations.py", + "keywords": [] + }, + { + "name": "scil_connectivity_compute_matrices.py", + "keywords": [] + }, + { + "name": "scil_connectivity_compute_pca.py", + "keywords": [] + }, + { + "name": "scil_connectivity_filter.py", + "keywords": [] + }, + { + "name": "scil_connectivity_graph_measures.py", + "keywords": [] + }, + { + "name": "scil_connectivity_hdf5_average_density_map.py", + "keywords": [] + }, + { + "name": "scil_connectivity_math.py", + "keywords": [] + }, + { + "name": "scil_connectivity_normalize.py", + "keywords": [] + }, + { + "name": "scil_connectivity_pairwise_agreement.py", + "keywords": [] + }, + { + "name": "scil_connectivity_print_filenames.py", + "keywords": [] + }, + { + "name": "scil_connectivity_reorder_rois.py", + "keywords": [] + }, + { + "name": "scil_denoising_nlmeans.py", + "keywords": [] + }, + { + "name": "scil_dki_metrics.py", + "keywords": [] + }, + { + "name": "scil_dti_convert_tensors.py", + "keywords": [ + "tensor", + "tensors", + "triangular matrix", + "fsl format", + "nifti format", + "mrtrix format", + "dipy format" + ] + }, + { + "name": "scil_dti_metrics.py", + "keywords": [ + "dti", + "metrics", + "diffusion tensor", + "FA", + "MD", + "AD", + "RD", + "RGB", + "eigenvector", + "eigenvalue", + "diffusivity" + ] + }, + { + "name": "scil_dwi_apply_bias_field.py", + "keywords": [] + }, + { + "name": "scil_dwi_compute_snr.py", + "keywords": [] + }, + { + "name": "scil_dwi_concatenate.py", + "keywords": [ + "merge", + "fuse", + "concatenate", + "diffusion data", + "DWI" + ] + }, + { + "name": "scil_dwi_convert_FDF.py", + "keywords": [] + }, + { + "name": "scil_dwi_detect_volume_outliers.py", + "keywords": [] + }, + { + "name": "scil_dwi_extract_b0.py", + "keywords": [ + "b0 extraction", + "b0", + "b-value 0", + "extract" + ] + }, + { + "name": "scil_dwi_extract_shell.py", + "keywords": [ + "shell extraction", + "b-value extraction", + "DWI", + "shell/b-value selection", + "extract", + "DWI split", + "DWI splitting", + "multiple shells" + ] + }, + { + "name": "scil_dwi_powder_average.py", + "keywords": [ + "powder average", + "DWI" + ] + }, + { + "name": "scil_dwi_prepare_eddy_command.py", + "keywords": [] + }, + { + "name": "scil_dwi_prepare_topup_command.py", + "keywords": [] + }, + { + "name": "scil_dwi_reorder_philips.py", + "keywords": [ + "philips", + "DWI reorder", + "original gradient table" + ] + }, + { + "name": "scil_dwi_split_by_indices.py", + "keywords": [ + "DWI splitting", + "DWI split", + "indices" + ] + }, + { + "name": "scil_dwi_to_sh.py", + "keywords": [ + "signal", + "spherical harmonics" + ] + }, + { + "name": "scil_fodf_max_in_ventricles.py", + "keywords": [ + "ventricles", + "maximum fODF", + "absolute threshold" + ] + }, + { + "name": "scil_fodf_memsmt.py", + "keywords": [ + "b-tensor", + "b-tensor encoding", + "multi-encoding", + "multi-shell", + "multi-tissue", + "memsmt", + "linear tensor encoding (LTE)", + "planar tensor encoding (PTE)", + "spherical tensor encoding (STE)", + "multidimensional diffusion MRI", + "volume fraction", + "CSD", + "constrained spherical deconvolution", + "fODF" + ] + }, + { + "name": "scil_fodf_metrics.py", + "keywords": [ + "fODF metrics", + "NuFO", + "peaks", + "directions", + "peak values", + "peak indices", + "RGB", + "AFD" + ] + }, + { + "name": "scil_fodf_msmt.py", + "keywords": [ + "CSD", + "constrained spherical deconvolution", + "multi-shell", + "multi-tissue", + "msmt", + "volume fraction", + "fODF" + ] + }, + { + "name": "scil_fodf_ssst.py", + "keywords": [ + "CSD", + "constrained spherical deconvolution", + "single-shell", + "single-tissue", + "ssst", + "fODF" + ] + }, + { + "name": "scil_fodf_to_bingham.py", + "keywords": [ + "lobe", + "lobe-specific", + "bingham-odf" + ] + }, + { + "name": "scil_freewater_maps.py", + "keywords": [] + }, + { + "name": "scil_freewater_priors.py", + "keywords": [] + }, + { + "name": "scil_frf_mean.py", + "keywords": [ + "fiber response function", + "response function", + "RF", + "FRF", + "mean", + "mean FRF" + ] + }, + { + "name": "scil_frf_memsmt.py", + "keywords": [ + "fiber response function", + "response function", + "RF", + "FRF", + "b-tensor", + "b-tensor encoding", + "multi-encoding", + "multi-shell", + "multi-tissue", + "memsmt", + "linear tensor encoding (LTE)", + "planar tensor encoding (PTE)", + "spherical tensor encoding (STE)", + "multidimensional diffusion MRI" + ] + }, + { + "name": "scil_frf_msmt.py", + "keywords": [ + "fiber response function", + "response function", + "RF", + "FRF", + "multi-shell", + "multi-tissue", + "msmt" + ] + }, + { + "name": "scil_frf_set_diffusivities.py", + "keywords": [ + "fiber response function", + "response function", + "RF", + "FRF", + "diffusivity", + "diffusivities", + "fixed FRF" + ] + }, + { + "name": "scil_frf_ssst.py", + "keywords": [ + "fiber response function", + "response function", + "RF", + "FRF", + "single-shell", + "single-tissue", + "ssst" + ] + }, + { + "name": "scil_get_version.py", + "keywords": [] + }, + { + "name": "scil_gradients_apply_transform.py", + "keywords": [ + "gradients", + "bvecs", + "b-vectors", + "transformation", + "transform" + ] + }, + { + "name": "scil_gradients_convert.py", + "keywords": [ + "gradients", + "gradient table", + "fsl format", + "mrtrix format", + "bval", + "bvec" + ] + }, + { + "name": "scil_gradients_generate_sampling.py", + "keywords": [ + "gradients", + "gradient table", + "sampling scheme", + "sampling", + "hardi", + "multi-shell", + "caruyer", + "optimized gradients" + ] + }, + { + "name": "scil_gradients_modify_axes.py", + "keywords": [] + }, + { + "name": "scil_gradients_round_bvals.py", + "keywords": [ + "bvals", + "b-value", + "round bvals", + "shell" + ] + }, + { + "name": "scil_gradients_validate_correct_eddy.py", + "keywords": [] + }, + { + "name": "scil_gradients_validate_correct.py", + "keywords": [ + "fiber coherence index", + "coherence" + ] + }, + { + "name": "scil_header_print_info.py", + "keywords": [] + }, + { + "name": "scil_header_validate_compatibility.py", + "keywords": [] + }, + { + "name": "scil_json_convert_entries_to_xlsx.py", + "keywords": [] + }, + { + "name": "scil_json_harmonize_entries.py", + "keywords": [] + }, + { + "name": "scil_json_merge_entries.py", + "keywords": [] + }, + { + "name": "scil_labels_combine.py", + "keywords": [] + }, + { + "name": "scil_labels_dilate.py", + "keywords": [] + }, + { + "name": "scil_labels_remove.py", + "keywords": [] + }, + { + "name": "scil_labels_split_volume_by_ids.py", + "keywords": [] + }, + { + "name": "scil_labels_split_volume_from_lut.py", + "keywords": [] + }, + { + "name": "scil_lesions_info.py", + "keywords": [] + }, + { + "name": "scil_mti_adjust_B1_header.py", + "keywords": [ + "MTI", + "magnetization transfer", + "MT", + "B1 map", + "header", + "B1" + ] + }, + { + "name": "scil_mti_maps_ihMT.py", + "keywords": [ + "MTI", + "magnetization transfer", + "MT", + "ihMT", + "ihMTR", + "ihMTsat", + "myelin", + "MTR", + "MTsat" + ] + }, + { + "name": "scil_mti_maps_MT.py", + "keywords": [ + "MTI", + "magnetization transfer", + "MT", + "MTR", + "MTsat", + "myelin" + ] + }, + { + "name": "scil_NODDI_maps.py", + "keywords": [] + }, + { + "name": "scil_NODDI_priors.py", + "keywords": [] + }, + { + "name": "scil_plot_stats_per_point.py", + "keywords": [] + }, + { + "name": "scil_qball_metrics.py", + "keywords": [ + "CSA", + "QBI", + "q-ball imaging", + "diffusion odf" + ] + }, + { + "name": "scil_rgb_convert.py", + "keywords": [] + }, + { + "name": "scil_sh_convert.py", + "keywords": [ + "spherical harmonics", + "tournier", + "mrtrix", + "descoteaux", + "dipy", + "modern", + "legacy" + ] + }, + { + "name": "scil_sh_fusion.py", + "keywords": [ + "spherical harmonics", + "SH", + "fusion", + "largest magnitude", + "merge", + "coefficients" + ] + }, + { + "name": "scil_sh_to_aodf.py", + "keywords": [ + "asymmetric", + "asymmetries", + "filtering", + "full basis" + ] + }, + { + "name": "scil_sh_to_rish.py", + "keywords": [ + "rotation invariant spherical harmonics", + "features" + ] + }, + { + "name": "scil_sh_to_sf.py", + "keywords": [ + "spherical harmonics", + "spherical functions", + "SH", + "SF", + "convertion", + "conversion" + ] + }, + { + "name": "scil_stats_group_comparison.py", + "keywords": [] + }, + { + "name": "scil_surface_apply_transform.py", + "keywords": [ + "registration", + "warp", + "transformation", + "surface", + "mesh", + "vtk FreeSurfer" + ] + }, + { + "name": "scil_surface_convert.py", + "keywords": [ + "surface", + "mesh", + "vtk FreeSurfer" + ] + }, + { + "name": "scil_surface_flip.py", + "keywords": [ + "surface", + "mesh", + "vtk FreeSurfer" + ] + }, + { + "name": "scil_surface_smooth.py", + "keywords": [ + "surface", + "mesh", + "vtk FreeSurfer" + ] + }, + { + "name": "scil_tracking_local_dev.py", + "keywords": [ + "development", + "runge-kutta", + "pure-python", + "onboarding", + "tractography", + "dipy" + ] + }, + { + "name": "scil_tracking_local.py", + "keywords": [ + "eudx", + "tractography", + "tracking", + "peak tracking", + "local tracking", + "probabilistic", + "deterministic", + "prob", + "det" + ] + }, + { + "name": "scil_tracking_pft_maps_edit.py", + "keywords": [ + "particule filtering tractography", + "CMC" + ] + }, + { + "name": "scil_tracking_pft_maps.py", + "keywords": [ + "particle filter tractography", + "continuous map criterion", + "tracking", + "FODF", + "CMC", + "particle filtering tractography" + ] + }, + { + "name": "scil_tracking_pft.py", + "keywords": [ + "particle filter tractography", + "continuous map criterion", + "tracking", + "FODF" + ] + }, + { + "name": "scil_tractogram_alter.py", + "keywords": [] + }, + { + "name": "scil_tractogram_apply_transform.py", + "keywords": [ + "ants", + "registration", + "affine", + "linear", + "nonlinear" + ] + }, + { + "name": "scil_tractogram_apply_transform_to_hdf5.py", + "keywords": [] + }, + { + "name": "scil_tractogram_assign_custom_color.py", + "keywords": [] + }, + { + "name": "scil_tractogram_assign_uniform_color.py", + "keywords": [] + }, + { + "name": "scil_tractogram_commit.py", + "keywords": [ + "microstructure informed", + "filtering", + "MIT" + ] + }, + { + "name": "scil_tractogram_compress.py", + "keywords": [] + }, + { + "name": "scil_tractogram_compute_density_map.py", + "keywords": [ + "TDI", + "track density imaging", + "streamline count" + ] + }, + { + "name": "scil_tractogram_compute_TODI.py", + "keywords": [ + "track orientation density imaging", + "track density imaging", + "TDI" + ] + }, + { + "name": "scil_tractogram_convert_hdf5_to_trk.py", + "keywords": [] + }, + { + "name": "scil_tractogram_convert.py", + "keywords": [] + }, + { + "name": "scil_tractogram_count_streamlines.py", + "keywords": [] + }, + { + "name": "scil_tractogram_cut_streamlines.py", + "keywords": [] + }, + { + "name": "scil_tractogram_detect_loops.py", + "keywords": [] + }, + { + "name": "scil_tractogram_dpp_math.py", + "keywords": [ + "tractogram", + "data per point" + ] + }, + { + "name": "scil_tractogram_extract_ushape.py", + "keywords": [] + }, + { + "name": "scil_tractogram_filter_by_anatomy.py", + "keywords": [] + }, + { + "name": "scil_tractogram_filter_by_length.py", + "keywords": [] + }, + { + "name": "scil_tractogram_filter_by_orientation.py", + "keywords": [] + }, + { + "name": "scil_tractogram_filter_by_roi.py", + "keywords": [ + "segment", + "atlas" + ] + }, + { + "name": "scil_tractogram_fix_trk.py", + "keywords": [] + }, + { + "name": "scil_tractogram_flip.py", + "keywords": [] + }, + { + "name": "scil_tractogram_math.py", + "keywords": [] + }, + { + "name": "scil_tractogram_pairwise_comparison.py", + "keywords": [] + }, + { + "name": "scil_tractogram_print_info.py", + "keywords": [] + }, + { + "name": "scil_tractogram_project_map_to_streamlines.py", + "keywords": [] + }, + { + "name": "scil_tractogram_project_streamlines_to_map.py", + "keywords": [] + }, + { + "name": "scil_tractogram_qbx.py", + "keywords": [ + "clustering" + ] + }, + { + "name": "scil_tractogram_register.py", + "keywords": [] + }, + { + "name": "scil_tractogram_remove_invalid.py", + "keywords": [] + }, + { + "name": "scil_tractogram_resample_nb_points.py", + "keywords": [] + }, + { + "name": "scil_tractogram_resample.py", + "keywords": [] + }, + { + "name": "scil_tractogram_seed_density_map.py", + "keywords": [] + }, + { + "name": "scil_tractogram_segment_and_score.py", + "keywords": [] + }, + { + "name": "scil_tractogram_segment_bundles_for_connectivity.py", + "keywords": [] + }, + { + "name": "scil_tractogram_segment_bundles.py", + "keywords": [] + }, + { + "name": "scil_tractogram_segment_one_bundle.py", + "keywords": [] + }, + { + "name": "scil_tractogram_shuffle.py", + "keywords": [] + }, + { + "name": "scil_tractogram_smooth.py", + "keywords": [] + }, + { + "name": "scil_tractogram_split.py", + "keywords": [] + }, + { + "name": "scil_viz_bingham_fit.py", + "keywords": [ + "visualisation", + "bingham distributions", + "bingham odf" + ] + }, + { + "name": "scil_viz_bundle.py", + "keywords": [ + "visualisation", + "bundle", + "tractogram", + "streamlines" + ] + }, + { + "name": "scil_viz_bundle_screenshot_mni.py", + "keywords": [] + }, + { + "name": "scil_viz_bundle_screenshot_mosaic.py", + "keywords": [] + }, + { + "name": "scil_viz_connectivity.py", + "keywords": [] + }, + { + "name": "scil_viz_dti_screenshot.py", + "keywords": [] + }, + { + "name": "scil_viz_fodf.py", + "keywords": [ + "visualize", + "fiber ODF", + "ODF", + "SH", + "peaks", + "background" + ] + }, + { + "name": "scil_viz_gradients_screenshot.py", + "keywords": [] + }, + { + "name": "scil_viz_tractogram_seeds_3d.py", + "keywords": [ + "visualize", + "seed", + "density", + "3D", + "seed density" + ] + }, + { + "name": "scil_viz_tractogram_seeds.py", + "keywords": [ + "visualize", + "seed", + "streamline", + "streamline origin" + ] + }, + { + "name": "scil_viz_volume_histogram.py", + "keywords": [ + "visualize", + "histogram", + "metric" + ] + }, + { + "name": "scil_viz_volume_scatterplot.py", + "keywords": [ + "visualize", + "scatterplot", + "distribution", + "metric" + ] + }, + { + "name": "scil_viz_volume_screenshot_mosaic.py", + "keywords": [] + }, + { + "name": "scil_viz_volume_screenshot.py", + "keywords": [] + }, + { + "name": "scil_volume_apply_transform.py", + "keywords": [] + }, + { + "name": "scil_volume_b0_synthesis.py", + "keywords": [] + }, + { + "name": "scil_volume_count_non_zero_voxels.py", + "keywords": [] + }, + { + "name": "scil_volume_crop.py", + "keywords": [] + }, + { + "name": "scil_volume_flip.py", + "keywords": [] + }, + { + "name": "scil_volume_math.py", + "keywords": [] + }, + { + "name": "scil_volume_remove_outliers_ransac.py", + "keywords": [] + }, + { + "name": "scil_volume_resample.py", + "keywords": [] + }, + { + "name": "scil_volume_reshape_to_reference.py", + "keywords": [] + }, + { + "name": "scil_volume_stats_in_labels.py", + "keywords": [] + }, + { + "name": "scil_volume_stats_in_ROI.py", + "keywords": [] + } + ], + "synonyms": [ + [ + "bundle", + "tract", + "pathway", + "fasciculus" + ], + [ + "multi-shells", + "multishell", + "multi shell", + "msmt" + ], + [ + "SH", + "Spherical Harmonics" + ], + + [ + "single-shell", + "single shell", + "ssst" + ], + [ + "ODF", + "orientation distribution function" + ], + [ + "DWI", + "diffusion weighted imaging" + ], + [ + "shell", + "bval", + "b-value", + "bvals" + ], + [ + "b-tensor encoding", + "tensor-valued" + ], + [ + "surface", + "mesh" + ], + [ + "merge", + "fuse", + "concatenate", + "add" + ], + [ + "parcellate", + "subdivide", + "split", + "divide" + ] + ], + "acronyms": [ + ["k-nn", "k-nearest neighbors"], + ["1d", "one-dimensional"], + ["2d", "two-dimensional"], + ["3d", "three-dimensional"], + ["ac", "anterior commissure"], + ["ae", "autoencoder"], + ["af", "arcuate fascicle"], + ["ai", "artificial intelligence"], + ["ann", "artificial neural network"], + ["ar", "acoustic radiation"], + ["atr", "anterior thalamic radiation"], + ["cc", "corpus callosum"], + ["cing", "cingulum"], + ["cinta", "clustering in tractography using autoencoders"], + ["cnn", "convolutional neural network"], + ["csd", "constrained spherical deconvolution"], + ["csf", "cerebrospinal fluid"], + ["cst", "corticospinal tract"], + ["dl", "deep learning"], + ["dmri", "diffusion magnetic resonance imaging"], + ["dodf", "diffusion orientation distribution function"], + ["dt", "diffusion tensor"], + ["dti", "diffusion tensor imaging"], + ["dw-mri", "diffusion-weighted magnetic resonance imaging"], + ["dwi", "diffusion-weighted imaging"], + ["dwm", "deep white matter"], + ["eap", "ensemble average (diffusion) propagator"], + ["fa", "fractional anisotropy"], + ["fat", "frontal aslant tract"], + ["fc", "fully connected"], + ["finta", "filtering in tractography using autoencoders"], + ["fmri", "functional magnetic resonance imaging"], + ["fod", "fiber orientation distribution"], + ["fodf", "fiber orientation distribution function"], + ["ft", "fourier transform"], + ["fx", "fornix"], + ["ge", "gradient echo"], + ["gesta", "generative sampling in bundle tractography using autoencoders"], + ["gm", "gray matter"], + ["hardi", "high angular resolution diffusion imaging"], + ["ic", "internal capsule"], + ["icp", "inferior cerebellar peduncle"], + ["ifof", "inferior fronto-occipital fascicle"], + ["ils", "inferior longitudinal system"], + ["jif", "journal impact factor"], + ["mcp", "middle cerebellar peduncle"], + ["ml", "machine learning"], + ["mlp", "multilayer perceptron"], + ["mls", "middle longitudinal system"], + ["mr", "magnetic resonance"], + ["mri", "magnetic resonance imaging"], + ["nn", "neural network"], + ["nos", "number of streamlines"], + ["odf", "orientation distribution function (also referred to as orientation density function)"], + ["or", "optic radiation"], + ["pc", "posterior commissure"], + ["pca", "principal component analysis"], + ["pdf", "probability density function"], + ["pgse", "pulsed-gradient spin echo"], + ["pli", "polarized light imaging"], + ["popt", "parieto-occipital pontine tract"], + ["ps-oct", "polarization-sensitive optical coherence tomography"], + ["rf", "radio frequency"], + ["rnn", "recurrent neural network"], + ["roc", "receiver operating characteristic"], + ["scp", "superior cerebellar peduncle"], + ["sd", "spherical deconvolution"], + ["se", "spin echo"], + ["set", "surface-enhanced tractography"], + ["sls", "superior longitudinal system"], + ["smri", "structural magnetic resonance imaging"], + ["swm", "superficial white matter"], + ["t1-w", "t1-weighted image"], + ["te", "echo time"], + ["tr", "repetition time"], + ["uf", "uncinate fascicle"], + ["vae", "variational autoencoder"], + ["wm", "white matter"], + ["3d", "three dimensions"], + ["4d", "four dimensions"], + ["act", "anatomically-constrained tractography"], + ["amico", "accelerated microstructure imaging via convex optimization"], + ["apm", "average pathlength map"], + ["bet", "brain extraction tool"], + ["cdmri", "computational diffusion mri"], + ["cg", "cingulum"], + ["cmc", "continuous maps criterion"], + ["commit", "convex optimization modeling for microstructure informed tractography"], + ["csa", "constant solid-angle"], + ["csf/lcs/lcr", "cerebrospinal fluid"], + ["c-dec", "connectivity directionally-encoded color"], + ["dec", "directionally-encoded color"], + ["dwi", "diffusion-weighted imaging"], + ["dmri", "diffusion-weighted imaging"], + ["irmd", "diffusion-weighted imaging"], + ["eap", "ensemble average propagator"], + ["epi", "echo-planar imaging"], + ["fast", "fmrib’s automated segmentation tool"], + ["flirt", "fmrib’s linear image registration tool"], + ["fmt", "fast marching tractography"], + ["fsl", "fmrib software library"], + ["grappa", "generalized autocalibrating partially parallel acquisition"], + ["ifof", "inferior fronto-occipital fasciculus"], + ["ipmi", "information processing in medical imaging"], + ["ismrm", "international society for magnetic resonance in medicine"], + ["miccai", "medical image computing and computer assisted intervention"], + ["mprage", "magnetization-prepared rapid acquisition with gradient-echo"], + ["irm", "magnetic resonance imaging"], + ["mri", "magnetic resonance imaging"], + ["odf", "orientation distribution function"], + ["ohbm", "organization for human brain mapping"], + ["pve", "partial volume estimation"], + ["roi", "region of interest"], + ["rtt", "real-time tractography"], + ["sh", "spherical harmonics"], + ["slf", "superior longitudinal fasciculus"], + ["snr", "signal-to-noise ratio"], + ["twi", "track-weighted imaging"], + ["voi", "volume of interest"], + ["ats", "anterior traverse system"], + ["a123", "area 1/2/3 (upper limb, head, and face region)"], + ["a4hf", "area 4 (head and face region)"], + ["a4ul", "area 4 (upper limb region)"], + ["a46", "area 46"], + ["af", "arcuate fasciculus"], + ["bls", "basal longitudinal system"], + ["ca39", "caudal area 39"], + ["cdl", "caudal dorsolateral area"], + ["cvl", "caudal ventrolateral area"], + ["cdl", "caudolateral of area"], + ["csf", "cerebral spinal fluid"], + ["ctoi", "conservative tract of interest"], + ["da9/36", "dorsal area 9/46"], + ["ddi", "dorsal dysgranular insula"], + ["dl6", "dorsolateral area 6"], + ["dl37", "dorsolateral area 37 region"], + ["efc", "extreme/external capsule fibre system"], + ["fodfs", "fibre orientation distribution functions"], + ["fus", "fusiform gyrus"], + ["hcp", "human connectome project"], + ["ifg", "inferior frontal gyrus"], + ["ils", "inferior longitudinal system"], + ["ipl", "inferior parietal lobe"], + ["itg", "inferior temporal gyrus"], + ["ins", "insula"], + ["ipa", "intraparietal area"], + ["la", "lateral area"], + ["locc", "lateral occipital cortex"], + ["cin", "limbic lobe/cingulum"], + ["mme", "mean millimetre error"], + ["mvocc", "medioventral occipital cortex"], + ["mlf", "medial longitudinal fasciculus"], + ["mesls", "mesial longitudinal system"], + ["mfg", "middle frontal gyrus"], + ["midls", "middle longitudinal systems"], + ["mlf", "middle longitudinal fasciculus"], + ["mtg", "middle temporal gyrus"], + ["mni", "montreal neurological institute"], + ["opa", "opercular area"], + ["ofg", "orbital frontal gyrus"], + ["pag", "paracentral gyrus"], + ["pft", "particle-filter tractography"], + ["pocg", "postcentral gyrus"], + ["pts", "posterior traverse system"], + ["pcg", "precentral gyrus"], + ["pcun", "precuneus"], + ["rois", "regions of interest"], + ["rda", "rostrodorsal area"], + ["rva", "rostroventral area"], + ["stoi", "sublobe tract of interest"], + ["sfg", "superior frontal gyrus"], + ["slf", "superior longitudinal fasciculus"], + ["sls", "superior longitudinal system"], + ["spl", "superior parietal lobule"], + ["stl", "superior temporal lobe"], + ["sma", "supplementary motor area"], + ["tois", "tracts of interest"], + ["t", "tesla"], + ["uf", "uncinate fasciculus"], + ["vtois", "variable tract of interest"], + ["abs", "atlas based segmentation"], + ["afd", "apparent fibre density"], + ["ad", "axial diffusivity"], + ["bids", "brain imaging data structure"], + ["lcs", "cerebrospinal fluid"], + ["dodf", "diffusion orientation distribution function"], + ["flair", "fluid-attenuated inversion recovery"], + ["frf", "fibre response function"], + ["rd", "radial diffusivity"], + ["rf", "radio frequency"], + ["scil", "sherbrooke connectivity imaging laboratory"], + ["sp", "multiple sclerosis"], + ["cpu", "central processing unit"], + ["frt", "funk-radon transform"], + ["go", "gigabyte"], + ["gpu", "graphical processing unit"], + ["gru", "gated recurrent unit"], + ["irm", "magnetic resonance imaging"], + ["irmd", "diffusion-weighted magnetic resonance imaging"], + ["lstm", "long short-term memory network"], + ["md", "mean diffusivity"], + ["ram", "random access memory"], + ["rnn", "recurrent neural network"], + ["3d-shore", "three dimensional simple harmonic oscillator based reconstruction and estimation model"], + ["ae", "angular error metric"], + ["cdf", "cohen-daubechies-feauveau"], + ["cdsi", "classical diffusion spectrum imaging model"], + ["cs", "compressive sensing"], + ["csa", "constant solid angle q-ball model"], + ["csd", "constrained spherical deconvolution model"], + ["cv", "cross validation"], + ["ddsi", "diffusion spectrum imaging deconvolution model"], + ["dipy", "diffusion in python software"], + ["dnc", "difference in the number of fiber compartments metric"], + ["dsi", "diffusion spectrum imaging model"], + ["dsi515", "classical diffusion spectrum imaging acquisition scheme with 515 samples"], + ["dsistudio", "dsi studio software"], + ["dti", "diffusion tensor imaging model"], + ["dtk", "diffusion toolkit software"], + ["dtwt", "dual tree wavelet transform"], + ["dw", "diffusion weighted"], + ["dwi", "diffusion weighted imaging"], + ["dwt", "discrete wavelet transform"], + ["fodf", "fiber orientation distribution function"], + ["ib", "invalib bundles metric"], + ["idft", "inverse discrete fourier transform"], + ["isbi", "ieee international symposium on biomedical imaging"], + ["isbi2013", "subset of the dataset from the hardi challenge at the conference isbi2013"], + ["isbi2013-full", "dataset from the hardi challenge at the conference isbi2013"], + ["mgh-ucla hcp", "(massachusetts general hospital - university of california, los angeles) human connectome project"], + ["nmse", "normalized mean square error"], + ["odsi", "optimal diffusion spectrum imaging model"], + ["pccoeff", "pearson correlation coefficient"], + ["pdsi", "plain diffusion spectrum imaging model"], + ["pgse", "pulse-gradient spin-echo"], + ["qbi", "q-ball imaging model"] + ] + +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a9b9d3207..6d6701aad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,6 +24,7 @@ matplotlib==3.6.* PyMCubes==0.1.* nibabel==5.2.* nilearn==0.9.* +nltk==3.8.* numpy==1.25.* openpyxl==3.0.* packaging == 23.2.* diff --git a/scilpy/utils/scilpy_bot.py b/scilpy/utils/scilpy_bot.py new file mode 100644 index 000000000..56c923cdc --- /dev/null +++ b/scilpy/utils/scilpy_bot.py @@ -0,0 +1,362 @@ +import ast +import nltk +import pathlib +import subprocess +from nltk.stem import PorterStemmer +from colorama import Fore, Style +import re +from tqdm import tqdm + +stemmer = PorterStemmer() + +RED = '\033[31m' +BOLD = '\033[1m' +END_COLOR = '\033[0m' +SPACING_CHAR = '=' +SPACING_LEN = 80 + +# Path to the JSON file containing script information and keywords +VOCAB_FILE_PATH = pathlib.Path( + __file__).parent.parent.parent/'data' / 'vocabulary' / 'vocabulary.json' + + +OBJECTS = [ + 'aodf', 'bids', 'bingham', 'btensor', 'bundle', + 'connectivity', 'denoising', 'dki', 'dti', 'dwi', + 'fodf', 'freewater', 'frf', 'gradients', 'header', + 'json', 'labels', 'lesions', 'mti', 'NODDI', 'sh', + 'surface', 'tracking', 'tractogram', 'viz', 'volume', + 'qball', 'rgb', 'lesions' +] + + +def prompt_user_for_object(): + """ + Prompts the user to select an object from the list of available objects. + """ + print("Available objects:") + for idx, obj in enumerate(OBJECTS): + print(f"{idx + 1}. {obj}") + while True: + try: + choice = int( + input("Choose the object you want to work on " + "(enter the number): ")) + if 1 <= choice <= len(OBJECTS): + return OBJECTS[choice - 1] + else: + print(f"Please enter a number between 1 and {len(OBJECTS)}.") + except ValueError: + print("Invalid input. Please enter a number.") + + +def _make_title(text): + """ + Returns a formatted title string with centered text and spacing + """ + return f'{Fore.BLUE}{Style.BRIGHT}{text.center(80, "=")}{Style.RESET_ALL}' + + +def _get_docstring_from_script_path(script): + """ + Extract a python file's docstring from a filepath. + + Parameters + ---------- + script : str + Path to python file + + Returns + ------- + docstring : str + The file's docstring, or an empty string if there was no docstring. + """ + with open(script, 'r') as reader: + file_contents = reader.read() + module = ast.parse(file_contents) + docstring = ast.get_docstring(module) or '' + return docstring + + +def _split_first_sentence(text): + """ + Split the first sentence from the rest of a string by finding the first + dot or newline. If there is no dot or newline, return the full string as + the first sentence, and None as the remaining text. + + Parameters + ---------- + text : str + Text to parse. + + Returns + ------- + first_sentence : str + The first sentence, or the full text if no dot or newline was found. + remaining : str + Everything after the first sentence. + + """ + candidates = ['. ', '.\n'] + sentence_idx = -1 + for candidate in candidates: + idx = text.find(candidate) + if idx != -1 and idx < sentence_idx or sentence_idx == -1: + sentence_idx = idx + + split_idx = (sentence_idx + 1) or None + sentence = text[:split_idx] + remaining = text[split_idx:] if split_idx else "" + return sentence, remaining + + +def _stem_keywords(keywords): + """ + Stem a list of keywords using PorterStemmer. + + Parameters + ---------- + keywords : list of str + Keywords to be stemmed. + + Returns + ------- + list of str + Stemmed keywords. + """ + return [stemmer.stem(keyword) for keyword in keywords] + + +def _stem_text(text): + """ + Stem all words in a text using PorterStemmer. + + Parameters + ---------- + text : str + Text to be stemmed. + + Returns + ------- + str + Stemmed text. + """ + words = nltk.word_tokenize(text) + return ' '.join([stemmer.stem(word) for word in words]) + + +def _stem_phrase(phrase): + """ + Stem all words in a phrase using PorterStemmer. + + Parameters + ---------- + phrase : str + Phrase to be stemmed. + + Returns + ------- + str + Stemmed phrase. + """ + words = phrase.split() + return ' '.join([stemmer.stem(word) for word in words]) + + +def _generate_help_files(): + """ + This function iterates over all Python scripts in the 'scripts' directory, + runs each script with the '--h' flag to generate help text, + and saves the output in the '.hidden' directory. + + By doing this, we can precompute the help outputs for each script, + which can be useful for faster searches. + + If a help file already exists for a script, the script is skipped, + and the existing help file is left unchanged. + + The help output is saved in a hidden directory to avoid clutter in + the main scripts directory. + """ + + scripts_dir = pathlib.Path(__file__).parent.parent.parent / 'scripts' + + scripts = [script for script in scripts_dir.glob('*.py') + if script.name not in ['__init__.py', + 'scil_search_keywords.py']] + total_scripts = len(scripts) + + # Hidden directory to store help files + hidden_dir = scripts_dir / '.hidden' + hidden_dir.mkdir(exist_ok=True) + + # Iterate over all scripts and generate help files + with tqdm(total=total_scripts, desc="Generating help files") as pbar: + for script in scripts: + help_file = hidden_dir / f'{script.name}.help' + # Check if help file already exists + if help_file.exists(): + tqdm.write(f'Help file for {script.name} already exists. Skipping.') + pbar.update(1) + continue + + # Run the script with --h and capture the output + result = subprocess.run( + ['python', script, '--h'], capture_output=True, text=True) + + # Save the output to the hidden file + with open(help_file, 'w') as f: + f.write(result.stdout) + + tqdm.write(f'Help file saved to {help_file}') + pbar.update(1) + + # Check if any help files are missing and regenerate them + with tqdm(total=total_scripts, desc="Checking missing help files") as pbar: + for script in scripts_dir.glob('*.py'): + if script.name == '__init__.py' or script.name == 'scil_search_keywords.py': + pbar.update(1) + continue + help_file = hidden_dir / f'{script.name}.help' + if not help_file.exists(): + # Run the script with --h and capture the output + result = subprocess.run( + ['python', script, '--h'], capture_output=True, text=True) + + # Save the output to the hidden file + with open(help_file, 'w') as f: + f.write(result.stdout) + + tqdm.write(f'Regenerated help output for {script.name}') + pbar.update(1) + + +def _highlight_keywords(text, stemmed_keywords): + """ + Highlight the stemmed keywords in the given text using colorama. + + Parameters + ---------- + text : str + Text to highlight keywords in. + stemmed_keywords : list of str + Stemmed keywords to highlight. + + Returns + ------- + str + Text with highlighted keywords. + """ + words = text.split() + highlighted_text = [] + for word in words: + stemmed_word = stemmer.stem(word) + if stemmed_word in stemmed_keywords: + highlighted_text.append( + f'{Fore.RED}{Style.BRIGHT}{word}{Style.RESET_ALL}') + else: + highlighted_text.append(word) + return ' '.join(highlighted_text) + + +def _get_synonyms(keyword, synonyms_data): + """ + Get synonyms for a given keyword from the synonyms data. + + Parameters + ---------- + keyword : str + Keyword to find synonyms for. + synonyms_data : dict + Dictionary containing synonyms data. + + Returns + ------- + list of str + List of synonyms for the given keyword. + """ + keyword = keyword.lower() + for synonym_set in synonyms_data: + synonym_set = [synonym.lower() for synonym in synonym_set] + if keyword in synonym_set: + return synonym_set + return [] + + +def _extract_keywords_and_phrases(keywords): + """ + Extract keywords and phrases from the provided list. + + Parameters + ---------- + keywords : list of str + List of keywords and phrases. + + Returns + ------- + list of str, list of str + List of individual keywords and list of phrases. + """ + keywords_list = [] + phrases_list = [] + + for keyword in keywords: + # if keyword contain blank space (contains more that 1 word) + if ' ' in keyword: + phrases_list.append(keyword) + else: + keywords_list.append(keyword) + return keywords_list, phrases_list + + +def _calculate_score(keywords, phrases, text, filename): + """ + Calculate a score for how well the text and filename match the keywords. + + Parameters + ---------- + keywords : list of str + Keywords to search for. + phrases : list of str + Phrases to search for. + text : str + Text to search within. + filename : str + Filename to search within. + + Returns + ------- + dict + Score details based on the frequency of keywords + in the text and filename. + """ + stemmed_text = _stem_text(text.lower()) + stemmed_filename = _stem_text(filename.lower()) + score_details = {'total_score': 0} + + def is_match(found_word, keyword): + if len(keyword) <= 3: + return found_word == keyword + return stemmer.stem(found_word) == stemmer.stem(keyword) + + for keyword in keywords: + keyword = keyword.lower() + # Use regular expressions to match whole words only + keyword_pattern = re.compile(r'\b' + re.escape(keyword) + r'\b') + found_words = keyword_pattern.findall( + stemmed_text) + keyword_pattern.findall(stemmed_filename) + keyword_score = 0 + + for found_word in found_words: + if is_match(found_word, keyword): + keyword_score += 1 + + score_details[keyword] = keyword_score + score_details['total_score'] += keyword_score + + for phrase in phrases: + phrase_stemmed = _stem_text(phrase.lower()) + phrase_score = stemmed_text.count(phrase_stemmed) + score_details[phrase] = phrase_score + score_details['total_score'] += phrase_score + return score_details diff --git a/scilpy/utils/tests/test_scilpy_bot.py b/scilpy/utils/tests/test_scilpy_bot.py new file mode 100644 index 000000000..5574a1ce4 --- /dev/null +++ b/scilpy/utils/tests/test_scilpy_bot.py @@ -0,0 +1,76 @@ + +from scilpy.utils.scilpy_bot import ( + _make_title, _get_docstring_from_script_path, + _split_first_sentence, _stem_keywords, _stem_text, _stem_phrase, + _highlight_keywords, _get_synonyms, + _extract_keywords_and_phrases, _calculate_score +) + + +def test_make_title(): + result = _make_title("Test Title") + assert "Test Title" in result + + +def test_get_docstring_from_script_path(tmp_path): + script_content = '"""This is a test docstring."""' + script_path = tmp_path / "test_script.py" + script_path.write_text(script_content) + result = _get_docstring_from_script_path(str(script_path)) + assert result == "This is a test docstring." + + +def test_split_first_sentence(): + text = "This is the first sentence. This is the second sentence." + first, remaining = _split_first_sentence(text) + assert first == "This is the first sentence." + assert remaining == " This is the second sentence." + + +def test_stem_keywords(): + keywords = ["running", "jumps"] + result = _stem_keywords(keywords) + assert result == ["run", "jump"] + + +def test_stem_text(): + text = "Running and jumping." + result = _stem_text(text) + assert result == "run and jump ." + + +def test_stem_phrase(): + phrase = "Running quickly" + result = _stem_phrase(phrase) + assert result == "run quickli" + + +def test_highlight_keywords(): + text = "Running and jumping." + stemmed_keywords = ["run"] + result = _highlight_keywords(text, stemmed_keywords) + assert "Running" in result + + +def test_get_synonyms(): + synonyms_data = [["run", "sprint"], ["jump", "leap"]] + result = _get_synonyms("run", synonyms_data) + assert result == ["run", "sprint"] + + +def test_extract_keywords_and_phrases(): + keywords = ["running", "jumps", "quick run"] + result_keywords, result_phrases = _extract_keywords_and_phrases(keywords) + assert result_keywords == ["running", "jumps"] + assert result_phrases == ["quick run"] + + +def test_calculate_score(): + keywords = ["run"] + phrases = ["quick run"] + text = "Running quickly is fun. A quick run is good." + filename = "run_script.py" + result = _calculate_score(keywords, phrases, text, filename) + assert result["total_score"] == 3 + assert result["run"] == 2 + assert result["quick run"] == 1 diff --git a/scripts/scil_search_keywords.py b/scripts/scil_search_keywords.py index d65d58f2e..713805935 100755 --- a/scripts/scil_search_keywords.py +++ b/scripts/scil_search_keywords.py @@ -2,45 +2,79 @@ # -*- coding: utf-8 -*- """ -Search through all of SCILPY scripts and their docstrings. The output of the -search will be the intersection of all provided keywords, found either in the -script name or in its docstring. -By default, print the matching filenames and the first sentence of the -docstring. If --verbose if provided, print the full docstring. +Search through all SCILPY scripts and their docstrings to find matches for the +provided keywords. +The search will be performed across script names, docstrings, help files, +keywords, and optionally synonyms. +The output will list the matching filenames along with the occurrences of each +keyword, and their total score. + +- By default, the search includes synonyms for the keywords. +- Use --no_synonyms to exclude synonyms from the search. +- Use --search_category to limit the search to a specific category of scripts. +- Words enclosed in quotes will be searched as phrases, ensuring the words +appear next to each other in the text. + +Verbosity Options: +- If the `-v` option is provided, the script will display the first sentence + of the docstring for each matching script. +- If the `-v DEBUG` option is provided, the script will display the full + docstring for each matching script. + +Keywords Highlighting: +- When displaying the docstrings, the script highlights the found keywords in +red. Examples: - scil_search_keywords.py tractogram filtering - scil_search_keywords.py --search_parser tractogram filtering -v +- scil_search_keywords.py tractogram filtering +- scil_search_keywords.py "Spherical Harmonics" +- scil_search_keywords.py --no_synonyms "Spherical Harmonics" +- scil_search_keywords.py --search_category tractogram +- scil_search_keywords.py -v sh +- scil_search_keywords.py -v DEBUG sh + """ import argparse -import ast import logging import pathlib -import re -import subprocess - -import numpy as np +try: + import nltk + nltk.download('punkt', quiet=True) +except ImportError: + print("You must install the 'nltk' package to use this script." + "Please run 'pip install nltk'.") + exit(1) + +from colorama import Fore, Style +import json + +from scilpy.utils.scilpy_bot import ( + _get_docstring_from_script_path, _stem_keywords, + _stem_phrase, _generate_help_files, + _get_synonyms, _extract_keywords_and_phrases, + _calculate_score, _make_title, prompt_user_for_object, + _split_first_sentence, _highlight_keywords +) +from scilpy.utils.scilpy_bot import SPACING_LEN, VOCAB_FILE_PATH from scilpy.io.utils import add_verbose_arg -RED = '\033[31m' -BOLD = '\033[1m' -END_COLOR = '\033[0m' -SPACING_CHAR = '=' -SPACING_LEN = 80 +nltk.download('punkt', quiet=True) def _build_arg_parser(): p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter) + p.add_argument('keywords', nargs='+', help='Search the provided list of keywords.') - p.add_argument('--search_parser', action='store_true', - help='Search through and display the full script argparser ' - 'instead of looking only at the docstring. (warning: ' - 'much slower).') + p.add_argument('--search_category', action='store_true', + help='Search within a specific category of scripts.') + + p.add_argument('--no_synonyms', action='store_true', + help='Search without using synonyms.') add_verbose_arg(p) @@ -55,164 +89,166 @@ def main(): else: logging.getLogger().setLevel(logging.getLevelName(args.verbose)) - # Use directory of this script, should work with most installation setups + selected_object = None + if args.search_category: + selected_object = prompt_user_for_object() + + # keywords are single words. Phrases are composed keywords + keywords, phrases = _extract_keywords_and_phrases(args.keywords) + stemmed_keywords = _stem_keywords(keywords) + stemmed_phrases = [_stem_phrase(phrase) for phrase in phrases] + + # Create a mapping of stemmed to original keywords + # This will be needed to display the occurence of the keywords + keyword_mapping = {stem: orig for orig, + stem in zip(keywords, stemmed_keywords)} + phrase_mapping = {stem: orig for orig, + stem in zip(phrases, stemmed_phrases)} + script_dir = pathlib.Path(__file__).parent - matches = [] + hidden_dir = script_dir / '.hidden' - keywords_regexes = [re.compile('(' + re.escape(kw) + ')', re.IGNORECASE) - for kw in args.keywords] + if not hidden_dir.exists(): + hidden_dir.mkdir() + logging.info('This is your first time running this script.\n' + 'Generating help files may take a few minutes,' + 'please be patient.\n' + 'Subsequent searches will be much faster.') + _generate_help_files() - for script in sorted(script_dir.glob('*.py')): - filename = script.name - if filename == '__init__.py': + matches = [] + scores = {} + docstrings = {} # To store the docstrings of each script + + # pattern to search for + search_pattern = f'scil_{"{}_" if selected_object else ""}*.py' + + def update_matches_and_scores(filename, score_details, docstring=None): + """ + Update the matches and scores for the given filename based + on the score details. + + Parameters + ---------- + filename : str + The name of the script file being analyzed. + score_details : dict + A dictionary containing the scores for the keywords + and phrases found in the script. + This dictionary should have a 'total_score' key + indicating the cumulative score. + docstring : str, optional + The docstring of the script. + Returns + ------- + None + Just updates the global `matches` and `scores` lists/dictionaries. + """ + if score_details['total_score'] > 0: + if filename not in matches: + matches.append(filename) + scores[filename] = score_details + if docstring: + docstrings[filename] = docstring + else: + for key, value in score_details.items(): + if key != 'total_score': + scores[filename][key] = scores[filename].get( + key, 0) + value + scores[filename]['total_score'] += score_details['total_score'] + if docstring: + docstrings[filename] = docstring + + for script in sorted(script_dir.glob(search_pattern.format(selected_object))): + filename = script.stem + if filename == '__init__' or filename == 'scil_search_keywords': continue - # Skip this script - if filename == pathlib.Path(__file__).name: + # Search through the docstring + search_text = _get_docstring_from_script_path(str(script)) + score_details = _calculate_score( + stemmed_keywords, stemmed_phrases, search_text, filename=filename) + update_matches_and_scores(filename, score_details, + docstring=search_text) + + # Search in help files + help_file = hidden_dir / f"{filename}.py.help" + if help_file.exists(): + with open(help_file, 'r') as f: + search_text = f.read() + score_details = _calculate_score( + stemmed_keywords, stemmed_phrases, + search_text, filename=filename) + update_matches_and_scores(filename, score_details) + + # Search in keywords file + with open(VOCAB_FILE_PATH, 'r') as f: + vocab_data = json.load(f) + + for script in vocab_data['scripts']: + script_name = script['name'] + if selected_object and not script_name.startswith(f'scil_{selected_object}_'): continue - - error_msg = "" - if args.search_parser: - # Run the script's argparser, and catch the output in case there - # is an error, such as ModuleNotFoundException. - sub = subprocess.run(['{}'.format(script.absolute()), '--help'], - capture_output=True) - search_text = sub.stdout.decode("utf-8") - if sub.stderr: - # Fall back on the docstring in case of error - error_msg = "There was an error executing script parser, " \ - "searching through docstring instead...\n\n" + script_keywords = script['keywords'] + search_text = ' '.join(script_keywords) + score_details = _calculate_score( + stemmed_keywords, stemmed_phrases, search_text, script_name) + update_matches_and_scores(script_name, score_details) + + # Search in synonyms file if not args.no_synonyms is not specified + if not args.no_synonyms: + for keyword in keywords + phrases: + synonyms = _get_synonyms(keyword, vocab_data['synonyms']) + for script in sorted(script_dir.glob(search_pattern.format(selected_object))): + filename = script.stem + if filename == '__init__' or filename == 'scil_search_keywords': + continue search_text = _get_docstring_from_script_path(str(script)) - else: - # Fetch the docstring - search_text = _get_docstring_from_script_path(str(script)) + # Initialize or get existing score_details for the script + score_details = scores.get(filename, {'total_score': 0}) - # Test intersection of all keywords, either in filename or docstring - if not _test_matching_keywords(args.keywords, [filename, search_text]): - continue + for synonym in synonyms: + if synonym in search_text and synonym != keyword: + # Update score_details with count of each synonym found + score_details[keyword + ' synonyms'] = score_details.get( + keyword + ' synonyms', 0) + search_text.count(synonym) + score_details['total_score'] += search_text.count( + synonym) - matches.append(filename) - search_text = search_text or 'No docstring available!' - - display_filename = filename - display_short_info, display_long_info = _split_first_sentence( - search_text) - - # NOTE: It is important to do the formatting before adding color style, - # because python does not ignore ANSI color codes, and will count them - # as characters! - - # Center text, add spacing and make BOLD - header = _make_title(" {} ".format(display_filename)) - footer = _make_title(" End of {} ".format(display_filename)) - - # Highlight found keywords using ANSI color codes - colored_keyword = '{}\\1{}'.format(RED + BOLD, END_COLOR) - for regex in keywords_regexes: - header = regex.sub(colored_keyword, header) - footer = regex.sub(colored_keyword, footer) - display_short_info = regex.sub(colored_keyword, display_short_info) - display_long_info = regex.sub(colored_keyword, display_long_info) - - # Restore BOLD in header/footer after matching keywords, and make sure - # to add a END_COLOR at the end. - header = header.replace(END_COLOR, END_COLOR + BOLD) + END_COLOR - footer = footer.replace(END_COLOR, END_COLOR + BOLD) + END_COLOR - - # Print everything - logging.info(header) - if error_msg: - logging.info(RED + BOLD + error_msg + END_COLOR) - logging.info(display_short_info) - logging.debug(display_long_info) - logging.info(footer) - logging.info("\n") + # Directly update scores dictionary + scores[filename] = score_details if not matches: logging.info(_make_title(' No results found! ')) - -def _make_title(text): - return BOLD + text.center(SPACING_LEN, SPACING_CHAR) + END_COLOR - - -def _test_matching_keywords(keywords, texts): - """Test multiple texts for matching keywords. Returns True only if all - keywords are present in any of the texts. - - Parameters - ---------- - keywords : Iterable of str - Keywords to test for. - texts : Iterable of str - Strings that should contain the keywords. - - Returns - ------- - True if all keywords were found in at least one of the texts. - - """ - matches = [] - for key in keywords: - key_match = False - for text in texts: - if key.lower() in text.lower(): - key_match = True - break - matches.append(key_match) - - return np.all(matches) - - -def _get_docstring_from_script_path(script): - """Extract a python file's docstring from a filepath. - - Parameters - ---------- - script : str - Path to python file - - Returns - ------- - docstring : str - The file docstring, or an empty string if there was no docstring. - """ - with open(script, 'r') as reader: - file_contents = reader.read() - module = ast.parse(file_contents) - docstring = ast.get_docstring(module) or '' - return docstring - - -def _split_first_sentence(text): - """Split the first sentence from the rest of a string by finding the first - dot or newline. If there is no dot or newline, return the full string as - the first sentence, and None as the remaining text. - - Parameters - ---------- - text : str - Text to parse. - - Returns - ------- - first_sentence : str - The first sentence, or the full text if no dot or newline was found. - remaining : str - Everything after the first sentence. - - """ - candidates = ['. ', '.\n'] - sentence_idx = -1 - for candidate in candidates: - idx = text.find(candidate) - if idx != -1 and idx < sentence_idx or sentence_idx == -1: - sentence_idx = idx - - split_idx = (sentence_idx + 1) or None - sentence = text[:split_idx] - remaining = text[split_idx:] if split_idx else "" - return sentence, remaining + # Sort matches by score and display them + else: + sorted_matches = sorted( + matches, key=lambda x: scores[x]['total_score'], reverse=False) + + logging.info(_make_title(' Results Ordered by Score ')) + for match in sorted_matches: + if scores[match]['total_score'] > 0: + logging.info(f"{Fore.BLUE}{Style.BRIGHT}{match}{Style.RESET_ALL}") + + for word, score in scores[match].items(): + if word != 'total_score': + logging.info(f"{Fore.GREEN}Occurrence of '{keyword_mapping.get(word, phrase_mapping.get(word, word))}': {score}{Style.RESET_ALL}") + + # Highlight keywords based on verbosity level + if match in docstrings: + highlighted_docstring = _highlight_keywords(docstrings[match], + stemmed_keywords) + if args.verbose == 'INFO': + first_sentence = _split_first_sentence(highlighted_docstring)[0] + logging.info(f"{first_sentence.strip()}") + elif args.verbose == 'DEBUG': + logging.debug(f"{highlighted_docstring.strip()}") + logging.info(f"{Fore.RED}Total Score: {scores[match]['total_score']}{Style.RESET_ALL}") + logging.info(f"{Fore.BLUE}{'=' * SPACING_LEN}") + logging.info("\n") + logging.info(_make_title( + ' Results Ordered by Score (Best results at the bottom) ')) if __name__ == '__main__': diff --git a/scripts/tests/test_search_keywords.py b/scripts/tests/test_search_keywords.py index 5eda2b4fc..44dd6c907 100644 --- a/scripts/tests/test_search_keywords.py +++ b/scripts/tests/test_search_keywords.py @@ -7,16 +7,17 @@ def test_help_option(script_runner): assert ret.success -def test_no_verbose(script_runner): - ret = script_runner.run('scil_search_keywords.py', 'mti') - assert ret.success +def test_search_category(script_runner): + ret = script_runner.run('scil_search_keywords.py', '--search_category', 'sh') + assert 'Available objects:' in ret.stdout -def test_verbose_option(script_runner): - ret = script_runner.run('scil_search_keywords.py', 'mti', '-v') +def test_no_synonyms(script_runner): + ret = script_runner.run('scil_search_keywords.py', 'sh', '--no_synonyms') assert ret.success -def test_not_find(script_runner): +def test_not_found(script_runner): ret = script_runner.run('scil_search_keywords.py', 'toto') assert ret.success + assert 'No results found!' in ret.stdout or 'No results found!' in ret.stderr