diff --git a/docs/diagrams/arranger.md b/docs/diagrams/arranger.md index ad037bfb..528920bc 100644 --- a/docs/diagrams/arranger.md +++ b/docs/diagrams/arranger.md @@ -28,7 +28,7 @@ flowchart TD P --> |Yes| P1[Validate the file] ``` -## GNPS, AntiSMASH and BigScape +## GNPS and AntiSMASH ``` mermaid flowchart TD ConfigError[Dynaconf config validation error] @@ -36,7 +36,7 @@ flowchart TD UseIt[Use the data] Download[First remove existing data if relevent, then download or generate data] - A[GNPS, antiSMASH and BigSCape] --> B{Pass Dynaconf config validation?} + A[GNPS or antiSMASH] --> B{Pass Dynaconf config validation?} B -->|No | ConfigError B -->|Yes| G{Is the mode PODP?} @@ -53,6 +53,32 @@ flowchart TD J -->|Yes| UseIt ``` +## BigScape +```mermaid +flowchart TD + ConfigError[Dynaconf config validation error] + DataError[Data validation error] + UseIt[Use the data] + Download[First remove existing data if relevent, then download or generate data] + + A[BigSCape] --> B{Pass Dynaconf config validation?} + B -->|No | ConfigError + B -->|Yes| G{Is the mode PODP?} + + G -->|No, local mode| G1{Does data dir exist?} + G1 -->|No | Download + G1 -->|Yes| H{Pass data validation?} + H --> |No | DataError + H --> |Yes| UseIt + + G -->|Yes, podp mode| G2{Does data dir exist?} + G2 --> |No | Download + G2 --> |Yes | J{Pass data validation?} + J -->|No | Download --> |try max 2 times| J + J -->|Yes| UseIt +``` + + ## MIBiG Data MIBiG data is always downloaded automatically. Users cannot provide their own MIBiG data. diff --git a/src/nplinker/arranger.py b/src/nplinker/arranger.py index 24e0e56b..6f690e66 100644 --- a/src/nplinker/arranger.py +++ b/src/nplinker/arranger.py @@ -263,7 +263,9 @@ def _download_and_extract_antismash(self) -> None: def arrange_bigscape(self) -> None: """Arrange the BiG-SCAPE data. - For `local` mode, validate the BiG-SCAPE data. + For `local` mode, if the BiG-SCAPE data is provided by users, validate it and raise an error + if it is invalid. If the BiG-SCAPE data does not exist, run BiG-SCAPE to generate the + clustering file. For `podp` mode, if the BiG-SCAPE data does not exist, run BiG-SCAPE to generate the clustering file; if it exists but not valid, remove the data and re-run BiG-SCAPE to generate @@ -281,16 +283,19 @@ def arrange_bigscape(self) -> None: BiG-SCAPE data directory. - Check if the `data_sqlite.db` file exists in the BiG-SCAPE data directory. """ + if self.config.mode == "local" and self.bigscape_dir.exists(): + validate_bigscape(self.bigscape_dir, self.config.bigscape.cutoff) + return + pass_validation = False - if self.config.mode == "podp": - for _ in range(3): - try: - validate_bigscape(self.bigscape_dir, self.config.bigscape.cutoff) - pass_validation = True - break - except FileNotFoundError: - shutil.rmtree(self.bigscape_dir, ignore_errors=True) - self._run_bigscape() + for _ in range(3): + try: + validate_bigscape(self.bigscape_dir, self.config.bigscape.cutoff) + pass_validation = True + break + except FileNotFoundError: + shutil.rmtree(self.bigscape_dir, ignore_errors=True) + self._run_bigscape() if not pass_validation: validate_bigscape(self.bigscape_dir, self.config.bigscape.cutoff)