Merge branch 'master' of github.com:usegalaxy-eu/ena-upload-cli

bedroesb · bedroesb · commit a1b1685f6677 · 2021-12-24T12:00:12.000+01:00
diff --git a/ena_upload/ena_upload.py b/ena_upload/ena_upload.py
@@ -24,6 +24,7 @@
 
 class MyFTP_TLS(ftplib.FTP_TLS):
     """Explicit FTPS, with shared TLS session"""
+
     def ntransfercmd(self, cmd, rest=None):
         conn, size = ftplib.FTP.ntransfercmd(self, cmd, rest)
         if self._prot_p:
@@ -32,6 +33,7 @@ def ntransfercmd(self, cmd, rest=None):
                                             session=self.sock.session)
         return conn, size
 
+
 def create_dataframe(schema_tables, action):
     '''create pandas dataframe from the tables in schema_tables
        and return schema_dataframe
@@ -50,13 +52,15 @@ def create_dataframe(schema_tables, action):
     schema_dataframe = {}
 
     for schema, table in schema_tables.items():
-        df = pd.read_csv(table, sep='\t', comment='#', dtype = str)
+        df = pd.read_csv(table, sep='\t', comment='#', dtype=str)
         df = df.dropna(how='all')
         # checking for optional columns and if not present, adding them
         if schema == 'sample':
-            optional_columns = ['accession', 'submission_date', 'status', 'scientific_name', 'taxon_id']
+            optional_columns = ['accession', 'submission_date',
+                                'status', 'scientific_name', 'taxon_id']
         elif schema == 'run':
-            optional_columns = ['accession', 'submission_date', 'status', 'file_checksum']
+            optional_columns = ['accession',
+                                'submission_date', 'status', 'file_checksum']
         else:
             optional_columns = ['accession', 'submission_date', 'status']
         for header in optional_columns:
@@ -161,7 +165,7 @@ def generate_stream(schema, targets, Template, center, tool):
         # the run xml templates
         # Adding backwards compatibility for file_format
         if 'file_format' in targets:
-            targets.rename(columns={'file_format':'file_type'}, inplace=True)
+            targets.rename(columns={'file_format': 'file_type'}, inplace=True)
         file_attrib = ['file_name', 'file_type', 'file_checksum']
         other_attrib = ['alias', 'experiment_alias']
         run_groups = targets[other_attrib].groupby(targets['alias'])
@@ -311,6 +315,7 @@ def get_md5(filepath):
 
     return md5sum.hexdigest()
 
+
 def get_taxon_id(scientific_name):
     """Get taxon ID for input scientific_name.
 
@@ -332,6 +337,7 @@ def get_taxon_id(scientific_name):
         msg = f'Oops, no taxon ID avaible for {scientific_name}. Is it a valid scientific name?'
         sys.exit(msg)
 
+
 def get_scientific_name(taxon_id):
     """Get scientific name for input taxon_id.
 
@@ -362,28 +368,28 @@ def submit_data(file_paths, password, webin_id):
     print("\nConnecting to ftp.webin2.ebi.ac.uk....")
     try:
         ftps = MyFTP_TLS(timeout=10)
-        ftps.context.set_ciphers('DEFAULT@SECLEVEL=1')
+        ftps.context.set_ciphers('HIGH:!DH:!aNULL')
         ftps.connect(ftp_host, port=21)
         ftps.auth()
         ftps.login(webin_id, password)
         ftps.prot_p()
 
-    except IOError:
-        print(ftps.lastErrorText())
+    except IOError as ioe:
+        print(ioe)
         print("ERROR: could not connect to the ftp server.\
                Please check your login details.")
+        sys.exit()
     for filename, path in file_paths.items():
         print(f'uploading {path}')
         try:
-            ftps.storbinary(f'STOR {filename}', open(path, 'rb'))
-            msg = ftps.storbinary(f'STOR {filename}', open(path, 'rb'))
-            print(msg)
+            print(ftps.storbinary(f'STOR {filename}', open(path, 'rb')))
         except BaseException as err:
             print(f"ERROR: {err}")
-            print("ERROR: If your connection times out at this stage, it propably is because a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000.")
+            print("ERROR: If your connection times out at this stage, it propably is because of a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000.")
             raise
     print(ftps.quit())
 
+
 def columns_to_update(df):
     '''
     returns the column names where contains the cells to update
@@ -496,12 +502,12 @@ def make_update(update, ena_type):
             if match and match.group(1) in receipt_info:
                 receipt_info[match.group(1)].append(match.group(2))
             elif match and match.group(1) not in receipt_info:
-                receipt_info[match.group(1)]= [match.group(2)]
+                receipt_info[match.group(1)] = [match.group(2)]
         for ena_type, accessions in receipt_info.items():
             print(f"\n{ena_type.capitalize()} accession details:")
             update_list = []
             for accession in accessions:
-                extract = ( accession, receiptDate, status[action])
+                extract = (accession, receiptDate, status[action])
                 update_list.append(extract)
                 print("\t".join(extract))
 
@@ -558,7 +564,8 @@ def update_table(schema_dataframe, schema_targets, schema_update):
 
     return schema_dataframe
 
-def update_table_simple (schema_dataframe, schema_targets, action):
+
+def update_table_simple(schema_dataframe, schema_targets, action):
     """Update schema_dataframe with info in schema_targets.
 
     :param schema_dataframe: a dictionary - {schema:dataframe}
@@ -780,7 +787,8 @@ def main():
     schema_targets = extract_targets(action, schema_dataframe)
 
     if not schema_targets:
-        sys.exit(f"There is no table submitted having at least one row with {action} as action in the status column.")
+        sys.exit(
+            f"There is no table submitted having at least one row with {action} as action in the status column.")
 
     if action == 'ADD':
         # when adding run object
@@ -789,9 +797,9 @@ def main():
         if 'run' in schema_targets:
             # a dictionary of filename:file_path
             df = schema_targets['run']
-               
+
             file_paths = {os.path.basename(path): os.path.abspath(path)
-                                                for path in args.data}
+                          for path in args.data}
             # check if file names identical between command line and table
             # if not, system exits
             check_filenames(file_paths, df)
@@ -814,12 +822,13 @@ def main():
 
             # submit data to webin ftp server
             if args.no_data_upload:
-                print("No files will be uploaded, remove `--no_data_upload' argument to perform upload.")
+                print(
+                    "No files will be uploaded, remove `--no_data_upload' argument to perform upload.")
             elif draft:
-                print("No files will be uploaded, remove `--draft' argument to perform upload.")
+                print(
+                    "No files will be uploaded, remove `--draft' argument to perform upload.")
             else:
                 submit_data(file_paths, password, webin_id)
-                  
 
         # when adding sample
         # update schema_targets with taxon ids or scientific names
@@ -836,7 +845,8 @@ def main():
                     scientificName = get_scientific_name(row['taxon_id'])
                     df.loc[index, 'scientific_name'] = scientificName
                 elif pd.isna(row['taxon_id']) and pd.isna(row['scientific_name']):
-                    sys.exit(f"No taxon_id or scientific_name was given with sample {row['alias']}.")
+                    sys.exit(
+                        f"No taxon_id or scientific_name was given with sample {row['alias']}.")
             print('Taxon IDs and scientific names are retrieved')
             schema_targets['sample'] = df
 
@@ -892,8 +902,8 @@ def main():
             save_update(schema_tables, schema_dataframe)
         elif action in ['CANCEL', 'RELEASE']:
             schema_dataframe = update_table_simple(schema_dataframe,
-                                            schema_targets,
-                                            action)
+                                                   schema_targets,
+                                                   action)
             # save updates in new tables
             save_update(schema_tables, schema_dataframe)