diff --git a/README.md b/README.md index 64ff873..e478527 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,20 @@ $ pip3 install --user -r requirements.txt ## Scripts -There is a selection of scripts available in the [`scripts`](https://github.com/FLMNH-MGCL/digitization/tree/main/scripts) directory. They all have unique CLI structures, so be sure to run whichever is needed with the `--help` flag to get started. +There is a selection of scripts available in the [`scripts`](https://github.com/FLMNH-MGCL/digitization/tree/main/scripts) directory. They all have unique CLI structures, so be sure to run whichever is needed with the `--help` flag to get started. The below table provides a brief overview for each script: + +| Script | Description | +| -------------------- | ---------------------------------------------------------------------------------------------------------- | +| `dynaiello.py` | A version of the Aiello script with less column restrictions. Copy and rename entries based on a CSV file. | +| `gene_copy.py` | Removes divergent consensus sequences (IBA pipeline) from .fas/.fasta files | +| `gene_parser.py` | Parses .fa/.fasta files to extract accession numbers and gene names | +| `mgcl_tracker.py` | Tracks the used catalog numbers in the filesystem against a range/csv of numbers | +| `protein_combine.py` | Combines separated protein/nucleotide files into one combined file | +| `relocate.py` | _(deprecated)_ Relocates 'troublesome' images based on the log output of other scripts | +| `suspect_numbers.py` | Agreggates 'suspect' catalog numbers in a filesystem | +| `unique_values.py` | Outputs all the unique values in the columns of a CSV or XLSX file | +| `wls.py` | _(deprecated)_ Generates CSV of specimen at current working directory | +| `wrangler.py` | Assigns BOMBID numbers to collection specimen | ## Digitization Program diff --git a/scripts/gene_parser.py b/scripts/gene_parser.py index d47f651..b75ee8e 100644 --- a/scripts/gene_parser.py +++ b/scripts/gene_parser.py @@ -209,9 +209,6 @@ def collect_gene_data(self): if not header_line: break - # gene_line = f.readline() - - # gene = tuple((header_line.strip(), gene_line.strip())) gene = GeneParser.parse_gene_header(header_line.strip()) self.genes.append(gene) diff --git a/scripts/unique_values.py b/scripts/unique_values.py index f9a327e..2dc48cd 100644 --- a/scripts/unique_values.py +++ b/scripts/unique_values.py @@ -106,7 +106,7 @@ def verify_grouping(self): for col in self.group_by: try: self.raw_data[col] - except: + except Exception: error_message( "{} does not exist in the provided input file".format(col) ) @@ -115,7 +115,7 @@ def verify_grouping(self): for col in self.group_for: try: self.raw_data[col] - except: + except Exception: error_message( "{} does not exist in the provided input file".format(col) ) @@ -134,7 +134,6 @@ def write_out(self): def write_groups(self): logfile = Uniquer.generate_logname("UNIQUE_VALUES", self.destination) - # merged = None for heading, frame in self.unique_frames: with open(logfile, "a") as f: f.write(heading + "\n") @@ -149,11 +148,6 @@ def write_groups(self): # print(merged) def run(self): - - # print("\nParsing CSV...\n") - # self.raw_csv_data = pd.read_csv( - # self.csv_path, header=0, encoding="ISO-8859-1", low_memory=False) - if self.group_by: if self.group_for is not None: for col in self.group_for: diff --git a/scripts/wls.py b/scripts/wls.py index f97ae19..bf8cbc3 100644 --- a/scripts/wls.py +++ b/scripts/wls.py @@ -218,6 +218,7 @@ def extract_filter(argument_list, arg_len): def main(): argument_list = sys.argv arg_len = len(argument_list) + filter = extract_filter(argument_list, arg_len) if filter[0] == "BAD": @@ -274,8 +275,6 @@ def main(): # unknown option print("Unknown usage.") - # input("Press enter to exit...") - if __name__ == "__main__": main() \ No newline at end of file