24
24
25
25
class MyFTP_TLS (ftplib .FTP_TLS ):
26
26
"""Explicit FTPS, with shared TLS session"""
27
+
27
28
def ntransfercmd (self , cmd , rest = None ):
28
29
conn , size = ftplib .FTP .ntransfercmd (self , cmd , rest )
29
30
if self ._prot_p :
@@ -32,6 +33,7 @@ def ntransfercmd(self, cmd, rest=None):
32
33
session = self .sock .session )
33
34
return conn , size
34
35
36
+
35
37
def create_dataframe (schema_tables , action ):
36
38
'''create pandas dataframe from the tables in schema_tables
37
39
and return schema_dataframe
@@ -50,13 +52,15 @@ def create_dataframe(schema_tables, action):
50
52
schema_dataframe = {}
51
53
52
54
for schema , table in schema_tables .items ():
53
- df = pd .read_csv (table , sep = '\t ' , comment = '#' , dtype = str )
55
+ df = pd .read_csv (table , sep = '\t ' , comment = '#' , dtype = str )
54
56
df = df .dropna (how = 'all' )
55
57
# checking for optional columns and if not present, adding them
56
58
if schema == 'sample' :
57
- optional_columns = ['accession' , 'submission_date' , 'status' , 'scientific_name' , 'taxon_id' ]
59
+ optional_columns = ['accession' , 'submission_date' ,
60
+ 'status' , 'scientific_name' , 'taxon_id' ]
58
61
elif schema == 'run' :
59
- optional_columns = ['accession' , 'submission_date' , 'status' , 'file_checksum' ]
62
+ optional_columns = ['accession' ,
63
+ 'submission_date' , 'status' , 'file_checksum' ]
60
64
else :
61
65
optional_columns = ['accession' , 'submission_date' , 'status' ]
62
66
for header in optional_columns :
@@ -161,7 +165,7 @@ def generate_stream(schema, targets, Template, center, tool):
161
165
# the run xml templates
162
166
# Adding backwards compatibility for file_format
163
167
if 'file_format' in targets :
164
- targets .rename (columns = {'file_format' :'file_type' }, inplace = True )
168
+ targets .rename (columns = {'file_format' : 'file_type' }, inplace = True )
165
169
file_attrib = ['file_name' , 'file_type' , 'file_checksum' ]
166
170
other_attrib = ['alias' , 'experiment_alias' ]
167
171
run_groups = targets [other_attrib ].groupby (targets ['alias' ])
@@ -311,6 +315,7 @@ def get_md5(filepath):
311
315
312
316
return md5sum .hexdigest ()
313
317
318
+
314
319
def get_taxon_id (scientific_name ):
315
320
"""Get taxon ID for input scientific_name.
316
321
@@ -332,6 +337,7 @@ def get_taxon_id(scientific_name):
332
337
msg = f'Oops, no taxon ID avaible for { scientific_name } . Is it a valid scientific name?'
333
338
sys .exit (msg )
334
339
340
+
335
341
def get_scientific_name (taxon_id ):
336
342
"""Get scientific name for input taxon_id.
337
343
@@ -362,28 +368,28 @@ def submit_data(file_paths, password, webin_id):
362
368
print ("\n Connecting to ftp.webin2.ebi.ac.uk...." )
363
369
try :
364
370
ftps = MyFTP_TLS (timeout = 10 )
365
- ftps .context .set_ciphers ('DEFAULT@SECLEVEL=1 ' )
371
+ ftps .context .set_ciphers ('HIGH:!DH:!aNULL ' )
366
372
ftps .connect (ftp_host , port = 21 )
367
373
ftps .auth ()
368
374
ftps .login (webin_id , password )
369
375
ftps .prot_p ()
370
376
371
- except IOError :
372
- print (ftps . lastErrorText () )
377
+ except IOError as ioe :
378
+ print (ioe )
373
379
print ("ERROR: could not connect to the ftp server.\
374
380
Please check your login details." )
381
+ sys .exit ()
375
382
for filename , path in file_paths .items ():
376
383
print (f'uploading { path } ' )
377
384
try :
378
- ftps .storbinary (f'STOR { filename } ' , open (path , 'rb' ))
379
- msg = ftps .storbinary (f'STOR { filename } ' , open (path , 'rb' ))
380
- print (msg )
385
+ print (ftps .storbinary (f'STOR { filename } ' , open (path , 'rb' )))
381
386
except BaseException as err :
382
387
print (f"ERROR: { err } " )
383
- print ("ERROR: If your connection times out at this stage, it propably is because a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000." )
388
+ print ("ERROR: If your connection times out at this stage, it propably is because of a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000." )
384
389
raise
385
390
print (ftps .quit ())
386
391
392
+
387
393
def columns_to_update (df ):
388
394
'''
389
395
returns the column names where contains the cells to update
@@ -496,12 +502,12 @@ def make_update(update, ena_type):
496
502
if match and match .group (1 ) in receipt_info :
497
503
receipt_info [match .group (1 )].append (match .group (2 ))
498
504
elif match and match .group (1 ) not in receipt_info :
499
- receipt_info [match .group (1 )]= [match .group (2 )]
505
+ receipt_info [match .group (1 )] = [match .group (2 )]
500
506
for ena_type , accessions in receipt_info .items ():
501
507
print (f"\n { ena_type .capitalize ()} accession details:" )
502
508
update_list = []
503
509
for accession in accessions :
504
- extract = ( accession , receiptDate , status [action ])
510
+ extract = (accession , receiptDate , status [action ])
505
511
update_list .append (extract )
506
512
print ("\t " .join (extract ))
507
513
@@ -558,7 +564,8 @@ def update_table(schema_dataframe, schema_targets, schema_update):
558
564
559
565
return schema_dataframe
560
566
561
- def update_table_simple (schema_dataframe , schema_targets , action ):
567
+
568
+ def update_table_simple (schema_dataframe , schema_targets , action ):
562
569
"""Update schema_dataframe with info in schema_targets.
563
570
564
571
:param schema_dataframe: a dictionary - {schema:dataframe}
@@ -780,7 +787,8 @@ def main():
780
787
schema_targets = extract_targets (action , schema_dataframe )
781
788
782
789
if not schema_targets :
783
- sys .exit (f"There is no table submitted having at least one row with { action } as action in the status column." )
790
+ sys .exit (
791
+ f"There is no table submitted having at least one row with { action } as action in the status column." )
784
792
785
793
if action == 'ADD' :
786
794
# when adding run object
@@ -789,9 +797,9 @@ def main():
789
797
if 'run' in schema_targets :
790
798
# a dictionary of filename:file_path
791
799
df = schema_targets ['run' ]
792
-
800
+
793
801
file_paths = {os .path .basename (path ): os .path .abspath (path )
794
- for path in args .data }
802
+ for path in args .data }
795
803
# check if file names identical between command line and table
796
804
# if not, system exits
797
805
check_filenames (file_paths , df )
@@ -814,12 +822,13 @@ def main():
814
822
815
823
# submit data to webin ftp server
816
824
if args .no_data_upload :
817
- print ("No files will be uploaded, remove `--no_data_upload' argument to perform upload." )
825
+ print (
826
+ "No files will be uploaded, remove `--no_data_upload' argument to perform upload." )
818
827
elif draft :
819
- print ("No files will be uploaded, remove `--draft' argument to perform upload." )
828
+ print (
829
+ "No files will be uploaded, remove `--draft' argument to perform upload." )
820
830
else :
821
831
submit_data (file_paths , password , webin_id )
822
-
823
832
824
833
# when adding sample
825
834
# update schema_targets with taxon ids or scientific names
@@ -836,7 +845,8 @@ def main():
836
845
scientificName = get_scientific_name (row ['taxon_id' ])
837
846
df .loc [index , 'scientific_name' ] = scientificName
838
847
elif pd .isna (row ['taxon_id' ]) and pd .isna (row ['scientific_name' ]):
839
- sys .exit (f"No taxon_id or scientific_name was given with sample { row ['alias' ]} ." )
848
+ sys .exit (
849
+ f"No taxon_id or scientific_name was given with sample { row ['alias' ]} ." )
840
850
print ('Taxon IDs and scientific names are retrieved' )
841
851
schema_targets ['sample' ] = df
842
852
@@ -892,8 +902,8 @@ def main():
892
902
save_update (schema_tables , schema_dataframe )
893
903
elif action in ['CANCEL' , 'RELEASE' ]:
894
904
schema_dataframe = update_table_simple (schema_dataframe ,
895
- schema_targets ,
896
- action )
905
+ schema_targets ,
906
+ action )
897
907
# save updates in new tables
898
908
save_update (schema_tables , schema_dataframe )
899
909
0 commit comments