Skip to content

Commit a1b1685

Browse files
committed
Merge branch 'master' of github.com:usegalaxy-eu/ena-upload-cli
2 parents 79c224c + e45e398 commit a1b1685

File tree

1 file changed

+33
-23
lines changed

1 file changed

+33
-23
lines changed

ena_upload/ena_upload.py

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
class MyFTP_TLS(ftplib.FTP_TLS):
2626
"""Explicit FTPS, with shared TLS session"""
27+
2728
def ntransfercmd(self, cmd, rest=None):
2829
conn, size = ftplib.FTP.ntransfercmd(self, cmd, rest)
2930
if self._prot_p:
@@ -32,6 +33,7 @@ def ntransfercmd(self, cmd, rest=None):
3233
session=self.sock.session)
3334
return conn, size
3435

36+
3537
def create_dataframe(schema_tables, action):
3638
'''create pandas dataframe from the tables in schema_tables
3739
and return schema_dataframe
@@ -50,13 +52,15 @@ def create_dataframe(schema_tables, action):
5052
schema_dataframe = {}
5153

5254
for schema, table in schema_tables.items():
53-
df = pd.read_csv(table, sep='\t', comment='#', dtype = str)
55+
df = pd.read_csv(table, sep='\t', comment='#', dtype=str)
5456
df = df.dropna(how='all')
5557
# checking for optional columns and if not present, adding them
5658
if schema == 'sample':
57-
optional_columns = ['accession', 'submission_date', 'status', 'scientific_name', 'taxon_id']
59+
optional_columns = ['accession', 'submission_date',
60+
'status', 'scientific_name', 'taxon_id']
5861
elif schema == 'run':
59-
optional_columns = ['accession', 'submission_date', 'status', 'file_checksum']
62+
optional_columns = ['accession',
63+
'submission_date', 'status', 'file_checksum']
6064
else:
6165
optional_columns = ['accession', 'submission_date', 'status']
6266
for header in optional_columns:
@@ -161,7 +165,7 @@ def generate_stream(schema, targets, Template, center, tool):
161165
# the run xml templates
162166
# Adding backwards compatibility for file_format
163167
if 'file_format' in targets:
164-
targets.rename(columns={'file_format':'file_type'}, inplace=True)
168+
targets.rename(columns={'file_format': 'file_type'}, inplace=True)
165169
file_attrib = ['file_name', 'file_type', 'file_checksum']
166170
other_attrib = ['alias', 'experiment_alias']
167171
run_groups = targets[other_attrib].groupby(targets['alias'])
@@ -311,6 +315,7 @@ def get_md5(filepath):
311315

312316
return md5sum.hexdigest()
313317

318+
314319
def get_taxon_id(scientific_name):
315320
"""Get taxon ID for input scientific_name.
316321
@@ -332,6 +337,7 @@ def get_taxon_id(scientific_name):
332337
msg = f'Oops, no taxon ID avaible for {scientific_name}. Is it a valid scientific name?'
333338
sys.exit(msg)
334339

340+
335341
def get_scientific_name(taxon_id):
336342
"""Get scientific name for input taxon_id.
337343
@@ -362,28 +368,28 @@ def submit_data(file_paths, password, webin_id):
362368
print("\nConnecting to ftp.webin2.ebi.ac.uk....")
363369
try:
364370
ftps = MyFTP_TLS(timeout=10)
365-
ftps.context.set_ciphers('DEFAULT@SECLEVEL=1')
371+
ftps.context.set_ciphers('HIGH:!DH:!aNULL')
366372
ftps.connect(ftp_host, port=21)
367373
ftps.auth()
368374
ftps.login(webin_id, password)
369375
ftps.prot_p()
370376

371-
except IOError:
372-
print(ftps.lastErrorText())
377+
except IOError as ioe:
378+
print(ioe)
373379
print("ERROR: could not connect to the ftp server.\
374380
Please check your login details.")
381+
sys.exit()
375382
for filename, path in file_paths.items():
376383
print(f'uploading {path}')
377384
try:
378-
ftps.storbinary(f'STOR {filename}', open(path, 'rb'))
379-
msg = ftps.storbinary(f'STOR {filename}', open(path, 'rb'))
380-
print(msg)
385+
print(ftps.storbinary(f'STOR {filename}', open(path, 'rb')))
381386
except BaseException as err:
382387
print(f"ERROR: {err}")
383-
print("ERROR: If your connection times out at this stage, it propably is because a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000.")
388+
print("ERROR: If your connection times out at this stage, it propably is because of a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000.")
384389
raise
385390
print(ftps.quit())
386391

392+
387393
def columns_to_update(df):
388394
'''
389395
returns the column names where contains the cells to update
@@ -496,12 +502,12 @@ def make_update(update, ena_type):
496502
if match and match.group(1) in receipt_info:
497503
receipt_info[match.group(1)].append(match.group(2))
498504
elif match and match.group(1) not in receipt_info:
499-
receipt_info[match.group(1)]= [match.group(2)]
505+
receipt_info[match.group(1)] = [match.group(2)]
500506
for ena_type, accessions in receipt_info.items():
501507
print(f"\n{ena_type.capitalize()} accession details:")
502508
update_list = []
503509
for accession in accessions:
504-
extract = ( accession, receiptDate, status[action])
510+
extract = (accession, receiptDate, status[action])
505511
update_list.append(extract)
506512
print("\t".join(extract))
507513

@@ -558,7 +564,8 @@ def update_table(schema_dataframe, schema_targets, schema_update):
558564

559565
return schema_dataframe
560566

561-
def update_table_simple (schema_dataframe, schema_targets, action):
567+
568+
def update_table_simple(schema_dataframe, schema_targets, action):
562569
"""Update schema_dataframe with info in schema_targets.
563570
564571
:param schema_dataframe: a dictionary - {schema:dataframe}
@@ -780,7 +787,8 @@ def main():
780787
schema_targets = extract_targets(action, schema_dataframe)
781788

782789
if not schema_targets:
783-
sys.exit(f"There is no table submitted having at least one row with {action} as action in the status column.")
790+
sys.exit(
791+
f"There is no table submitted having at least one row with {action} as action in the status column.")
784792

785793
if action == 'ADD':
786794
# when adding run object
@@ -789,9 +797,9 @@ def main():
789797
if 'run' in schema_targets:
790798
# a dictionary of filename:file_path
791799
df = schema_targets['run']
792-
800+
793801
file_paths = {os.path.basename(path): os.path.abspath(path)
794-
for path in args.data}
802+
for path in args.data}
795803
# check if file names identical between command line and table
796804
# if not, system exits
797805
check_filenames(file_paths, df)
@@ -814,12 +822,13 @@ def main():
814822

815823
# submit data to webin ftp server
816824
if args.no_data_upload:
817-
print("No files will be uploaded, remove `--no_data_upload' argument to perform upload.")
825+
print(
826+
"No files will be uploaded, remove `--no_data_upload' argument to perform upload.")
818827
elif draft:
819-
print("No files will be uploaded, remove `--draft' argument to perform upload.")
828+
print(
829+
"No files will be uploaded, remove `--draft' argument to perform upload.")
820830
else:
821831
submit_data(file_paths, password, webin_id)
822-
823832

824833
# when adding sample
825834
# update schema_targets with taxon ids or scientific names
@@ -836,7 +845,8 @@ def main():
836845
scientificName = get_scientific_name(row['taxon_id'])
837846
df.loc[index, 'scientific_name'] = scientificName
838847
elif pd.isna(row['taxon_id']) and pd.isna(row['scientific_name']):
839-
sys.exit(f"No taxon_id or scientific_name was given with sample {row['alias']}.")
848+
sys.exit(
849+
f"No taxon_id or scientific_name was given with sample {row['alias']}.")
840850
print('Taxon IDs and scientific names are retrieved')
841851
schema_targets['sample'] = df
842852

@@ -892,8 +902,8 @@ def main():
892902
save_update(schema_tables, schema_dataframe)
893903
elif action in ['CANCEL', 'RELEASE']:
894904
schema_dataframe = update_table_simple(schema_dataframe,
895-
schema_targets,
896-
action)
905+
schema_targets,
906+
action)
897907
# save updates in new tables
898908
save_update(schema_tables, schema_dataframe)
899909

0 commit comments

Comments
 (0)