Skip to content

Commit

Permalink
Checking if filenames include blank spaces.
Browse files Browse the repository at this point in the history
  • Loading branch information
rfm-targa committed Mar 22, 2024
1 parent 8abe70a commit 640daff
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 27 deletions.
15 changes: 0 additions & 15 deletions CHEWBBACA/AlleleCall/allele_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -1960,21 +1960,6 @@ def allele_calling(fasta_files, schema_directory, temp_directory,
full_to_basename = im.mapping_function(fasta_files, fo.file_basename, [False])
full_to_unique = {k: fo.split_joiner(v, [0], '.') for k, v in full_to_basename.items()}

# Detect if some inputs share the same unique prefix
basename_list = list(full_to_unique.values())
if len(set(basename_list)) < len(fasta_files):
basename_counts = [[basename, basename_list.count(basename)]
for basename in set(basename_list)]
repeated_basenames = ['{0}: {1}'.format(*l)
for l in basename_counts if l[1] > 1]
# Only delete temp directory created for each run
# Do not delete output directory because it might include other files
fo.delete_directory(temp_directory)
sys.exit('\nSome input files share the same filename prefix '
'(substring before the first "." in the filename). '
'Please make sure that every input file has a unique '
'filename prefix.\n{0}'.format('\n'.join(repeated_basenames)))

# Create directory to store files with Pyrodigal results
pyrodigal_path = fo.join_paths(temp_directory, ['1_cds_prediction'])
fo.create_directory(pyrodigal_path)
Expand Down
12 changes: 0 additions & 12 deletions CHEWBBACA/CreateSchema/create_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,18 +187,6 @@ def create_schema_seed(fasta_files, output_directory, schema_name, ptf_path,
full_to_unique = {k: fo.split_joiner(v, [0], '.')
for k, v in full_to_basename.items()}

# Detect if some inputs share the same unique prefix
basename_list = list(full_to_unique.values())
if len(set(basename_list)) < len(fasta_files):
basename_counts = [[basename, basename_list.count(basename)]
for basename in set(basename_list)]
repeated_basenames = ['{0}: {1}'.format(*l)
for l in basename_counts if l[1] > 1]
sys.exit('\nSome input files share the same filename prefix '
'(substring before the first "." in the filename). '
'Please make sure that every input file has a unique '
'filename prefix.\n{0}'.format('\n'.join(repeated_basenames)))

# Create directory to store temporary files
temp_directory = fo.join_paths(output_directory, ['temp'])
fo.create_directory(temp_directory)
Expand Down
8 changes: 8 additions & 0 deletions CHEWBBACA/chewBBACA.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,10 @@ def msg(name=None):

genome_list = fo.join_paths(args.output_directory, [ct.GENOME_LIST])
args.input_files = pv.check_input_type(args.input_files, genome_list)
# Detect if some inputs share the same unique prefix
repeated_prefixes = pv.check_unique_prefixes(args.input_files)
# Detect if filenames include blank spaces
blank_spaces = pv.check_blanks(args.input_files)

# Add clustering parameters
args.word_size = ct.WORD_SIZE_DEFAULT
Expand Down Expand Up @@ -490,6 +494,10 @@ def msg(name=None):

genome_list = fo.join_paths(args.output_directory, [ct.GENOME_LIST])
genome_list = pv.check_input_type(args.input_files, genome_list)
# Detect if some inputs share the same unique prefix
repeated_prefixes = pv.check_unique_prefixes(genome_list)
# Detect if filenames include blank spaces
blank_spaces = pv.check_blanks(genome_list)

# Determine if schema was downloaded from Chewie-NS
ns_config = fo.join_paths(args.schema_directory, ['.ns_config'])
Expand Down
65 changes: 65 additions & 0 deletions CHEWBBACA/utils/parameters_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,3 +1295,68 @@ def validate_loci_list(input_path, output_file, parent_dir=None):
fo.write_lines(files, output_file)

return output_file


def check_unique_prefixes(input_list):
"""Check if all input files have an unique identifier.
Parameters
----------
input_list : str
Path to file that contains the list of paths to input files.
Returns
-------
False if there are no input files sharing the same identifier.
Raises
------
SystemExit
- If there are multiple files sharing the same prefix.
"""
input_paths = fo.read_lines(input_list)
basenames = [fo.file_basename(file) for file in input_paths]
unique_ids = [fo.split_joiner(name, [0], '.') for name in basenames]

# Detect if some inputs share the same unique prefix
if len(set(unique_ids)) < len(input_paths):
basename_counts = [[name, basenames.count(name)]
for name in set(basenames)]
repeated_basenames = ['{0}: {1}'.format(*l)
for l in basename_counts if l[1] > 1]
sys.exit('\nSome input files share the same filename prefix '
'(substring before the first "." in the filename). '
'Please make sure that every input file has a unique '
'filename prefix.\n{0}'.format('\n'.join(repeated_basenames)))

return False


def check_blank(input_list):
"""Check if input files do not include blank spaces in the filename.
Parameters
----------
input_list : str
Path to file that contains the list of paths to input files.
Returns
-------
False if there are no blank spaces in the filenames.
Raises
------
SystemExit
- If there are blank spaces in any of the filenames.
"""
input_paths = fo.read_lines(input_list)
basenames = [fo.file_basename(file) for file in input_paths]
include_blanks = [name for name in basenames if ' ' in name]

if len(include_blanks) > 0:
sys.exit('\nThe following input files include blank spaces '
'in the filename:\n{0}\nPlease ensure that filenames '
'do not include blank spaces or special characters '
'(e.g. !@#?$^*()+)'.format('\n'.join(include_blanks)))

return False

0 comments on commit 640daff

Please sign in to comment.