diff --git a/src/nplinker/pairedomics/strain_mappings_generator.py b/src/nplinker/pairedomics/strain_mappings_generator.py index ab553073..569d4f66 100644 --- a/src/nplinker/pairedomics/strain_mappings_generator.py +++ b/src/nplinker/pairedomics/strain_mappings_generator.py @@ -30,7 +30,7 @@ def podp_generate_strain_mappings( podp_project_json_file: str | PathLike, genome_status_json_file: str | PathLike, genome_bgc_mappings_file: str | PathLike, - gnps_file_mapping_tsv_file: str | PathLike, + gnps_file_mappings_file: str | PathLike, output_json_file: str | PathLike, ) -> StrainCollection: """Generate strain mappings JSON file for PODP pipeline. @@ -44,7 +44,7 @@ def podp_generate_strain_mappings( - "original_genome_id <-> resolved_genome_id" is extracted from `genome_status_json_file`. - "resolved_genome_id <-> bgc_id" is extracted from `genome_bgc_mappings_file`. - "strain_id <-> MS_filename" is extracted from `podp_project_json_file`. - - "MS_filename <-> spectrum_id" is extracted from `gnps_file_mapping_tsv_file`. + - "MS_filename <-> spectrum_id" is extracted from `gnps_file_mappings_file`. Args: podp_project_json_file(str | PathLike): The path to the PODP project @@ -53,8 +53,8 @@ def podp_generate_strain_mappings( JSON file. genome_bgc_mappings_file(str | PathLike): The path to the genome BGC mappings JSON file. - gnps_file_mapping_tsv_file(str | PathLike): The path to the GNPS file - mapping TSV file. + gnps_file_mappings_file(str | PathLike): The path to the GNPS file + mappings file (csv or tsv). output_json_file(str | PathLike): The path to the output JSON file. Returns: @@ -84,7 +84,7 @@ def podp_generate_strain_mappings( # Get mappings strain_id <-> MS_filename <-> spectrum_id mappings_strain_id_spectrum_id = get_mappings_strain_id_spectrum_id( extract_mappings_strain_id_ms_filename(podp_project_json_file), - extract_mappings_ms_filename_spectrum_id(gnps_file_mapping_tsv_file), + extract_mappings_ms_filename_spectrum_id(gnps_file_mappings_file), ) # Get mappings strain_id <-> bgc_id / spectrum_id @@ -280,24 +280,26 @@ def extract_mappings_strain_id_ms_filename( return mappings_dict -def extract_mappings_ms_filename_spectrum_id(tsv_file: str | PathLike) -> dict[str, set[str]]: +def extract_mappings_ms_filename_spectrum_id( + gnps_file_mappings_file: str | PathLike +) -> dict[str, set[str]]: """Extract mappings "MS_filename <-> spectrum_id". Args: - tsv_file(str | PathLike): The path to the GNPS file mapping TSV file. + gnps_file_mappings_file(str | PathLike): The path to the GNPS file mappings file (csv or + tsv). Returns: dict[str, set[str]]: Key is MS filename and value is a set of spectrum ids. Notes: - The `tsv_file` is generated by GNPS molecular networking. It's downloaded - from GNPS website to a file with a default name defined in - `GNPS_FILE_MAPPINGS_FILENAME`. + The `gnps_file_mappings_file` is generated by GNPS molecular networking. It's downloaded + from GNPS website to a file with a default name defined in `GNPS_FILE_MAPPINGS_FILENAME`. See Also: - `GNPSFileMappingLoader`: A class to load GNPS file mapping TSV file. + `GNPSFileMappingLoader`: A class to load GNPS file mappings file. """ - loader = GNPSFileMappingLoader(tsv_file) + loader = GNPSFileMappingLoader(gnps_file_mappings_file) return loader.mapping_reversed