From 76d63a6fd01a7da3e6ae7ffdba950884db6ce556 Mon Sep 17 00:00:00 2001 From: jnwei Date: Fri, 21 Nov 2025 12:56:28 +0700 Subject: [PATCH 1/6] Ruff formatting changes --- .../core/data/tools/colabfold_msa_server.py | 13 ++++++++---- openfold3/tests/test_colabfold_msa.py | 21 +++++++++---------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/openfold3/core/data/tools/colabfold_msa_server.py b/openfold3/core/data/tools/colabfold_msa_server.py index b4c5f21d..d8b694ba 100644 --- a/openfold3/core/data/tools/colabfold_msa_server.py +++ b/openfold3/core/data/tools/colabfold_msa_server.py @@ -711,17 +711,22 @@ def query_format_main(self): # Read template alignments if the file exists and has content template_alignments_file = self.output_directory / "raw/main/pdb70.m8" - if template_alignments_file.exists() and template_alignments_file.stat().st_size > 0: + if ( + template_alignments_file.exists() + and template_alignments_file.stat().st_size > 0 + ): template_alignments = pd.read_csv( template_alignments_file, sep="\t", header=None ) m_with_templates = set(template_alignments[0]) else: - # pdb70.m8 downloaded by Colabfold returned empty - No template alignments available + # pdb70.m8 downloaded by Colabfold returned empty - No template alignments available # Create empty DataFrame with expected column structure (at least column 0) # to match the structure when file is read with header=None. - logger.warning(f"Colabfold returned no templates. \ - Proceeding without template alignments for this batch.") + logger.warning( + f"Colabfold returned no templates. \ + Proceeding without template alignments for this batch." + ) template_alignments = pd.DataFrame() m_with_templates = set() diff --git a/openfold3/tests/test_colabfold_msa.py b/openfold3/tests/test_colabfold_msa.py index d780afc8..0a428be4 100644 --- a/openfold3/tests/test_colabfold_msa.py +++ b/openfold3/tests/test_colabfold_msa.py @@ -333,10 +333,10 @@ def test_empty_m8_file_handling( """Test that empty pdb70.m8 file is handled gracefully without crashing.""" test_sequence = "TESTSEQUENCE" query = self._construct_monomer_query(test_sequence) - + # Create an empty pdb70.m8 file (0 bytes) to simulate ColabFold returning empty template file self._make_empty_template_file(tmp_path) - + mapper = collect_colabfold_msa_data(query) runner = ColabFoldQueryRunner( colabfold_mapper=mapper, @@ -345,19 +345,19 @@ def test_empty_m8_file_handling( user_agent="test-agent", host_url="https://dummy.url", ) - + # Should not raise EmptyDataError or any other exception runner.query_format_main() - + # Verify MSA processing still works expected_unpaired_dir = tmp_path / "main" assert expected_unpaired_dir.exists(), "Expected main MSA directory to exist" - + expected_file = f"{get_sequence_hash(test_sequence)}.npz" assert (expected_unpaired_dir / expected_file).exists(), ( f"Expected MSA file {expected_file} to exist" ) - + # Verify no template files are created (since m8 file is empty) template_alignments_dir = tmp_path / "template" if template_alignments_dir.exists(): @@ -366,7 +366,7 @@ def test_empty_m8_file_handling( assert len(template_files) == 0, ( "Expected no template files to be created when m8 file is empty" ) - + # Test preprocess_colabfold_msas with empty template file msa_compute_settings = MsaComputationSettings( msa_file_format="npz", @@ -376,13 +376,12 @@ def test_empty_m8_file_handling( msa_output_directory=tmp_path, cleanup_msa_dir=False, ) - + # Call preprocess_colabfold_msas - should not raise any exception processed_query_set = preprocess_colabfold_msas( - inference_query_set=query, - compute_settings=msa_compute_settings + inference_query_set=query, compute_settings=msa_compute_settings ) - + # Verify that template fields are None/empty for all chains for query_name, query_obj in processed_query_set.queries.items(): for chain in query_obj.chains: From dba0840c851602bd7f496550ec1144b1185729c9 Mon Sep 17 00:00:00 2001 From: jnwei Date: Fri, 21 Nov 2025 13:11:58 +0700 Subject: [PATCH 2/6] Skip hacks in case cutlass path is otherwise defined (as with a local install) --- openfold3/hacks.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/openfold3/hacks.py b/openfold3/hacks.py index 97448b80..8105d8bd 100644 --- a/openfold3/hacks.py +++ b/openfold3/hacks.py @@ -1,14 +1,20 @@ import os from pathlib import Path +PLACEHOLDER_PATH = "placeholder" def prep_deepspeed(): # deepspeed requires the envvar set, but doesn't care about value - os.environ["CUTLASS_PATH"] = os.environ.get("CUTLASS_PATH", "placeholder") + if not os.environ["CUTLASS_PATH"]: + os.environ["CUTLASS_PATH"] = os.environ.get("CUTLASS_PATH", PLACEHOLDER_PATH) def prep_cutlass(): - # apparently need to set the headers for cutlass + # Skip this step if cutlass package itself is available + if os.environ["CUTLASS_PATH"] != PLACEHOLDER_PATH: + return + + # otherwise, apparently need to set the headers for cutlass import cutlass_library headers_dir = Path(cutlass_library.__file__).parent / "source/include" From 8377478b47634248f9ba09c76609a66c3f1594dd Mon Sep 17 00:00:00 2001 From: jnwei Date: Fri, 21 Nov 2025 13:20:00 +0700 Subject: [PATCH 3/6] isinstance OR fixes --- openfold3/core/data/primitives/sequence/msa.py | 6 +++--- openfold3/core/data/primitives/structure/labels.py | 2 +- openfold3/core/utils/chunk_utils.py | 2 +- openfold3/core/utils/tensor_utils.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/openfold3/core/data/primitives/sequence/msa.py b/openfold3/core/data/primitives/sequence/msa.py index e33f806b..16aa01af 100644 --- a/openfold3/core/data/primitives/sequence/msa.py +++ b/openfold3/core/data/primitives/sequence/msa.py @@ -141,8 +141,8 @@ def concatenate( f"deletion {d1.shape[1]} vs {d2.shape[1]})." ) # Preserve metadata if both are list/ndarray - if isinstance(self.metadata, (list, np.ndarray)) and isinstance( - msa_array.metadata, (list, np.ndarray) + if isinstance(self.metadata, list | np.ndarray) and isinstance( + msa_array.metadata, list | np.ndarray ): metadata_concat_fn = partial(np.concatenate, axis=0) else: @@ -223,7 +223,7 @@ def multi_concatenate( # metadata: can only stitch if all are array-like if all(isinstance(md, pd.DataFrame) for md in metas): meta_concat = pd.DataFrame() # pd.concat(metas, ignore_index=True) - elif all(isinstance(md, (list, np.ndarray)) for md in metas): + elif all(isinstance(md, list | np.ndarray) for md in metas): meta_concat = np.concatenate([np.asarray(md) for md in metas], axis=0) else: meta_concat = pd.DataFrame() diff --git a/openfold3/core/data/primitives/structure/labels.py b/openfold3/core/data/primitives/structure/labels.py index 1190d03b..615379be 100644 --- a/openfold3/core/data/primitives/structure/labels.py +++ b/openfold3/core/data/primitives/structure/labels.py @@ -649,7 +649,7 @@ class AtomArrayView: """Container to access underlying arrays holding AtomArray attributes.""" def __init__(self, atom_array: AtomArray, indices: np.ndarray | slice): - if not isinstance(indices, (np.ndarray, slice)): + if not isinstance(indices, np.ndarray | slice): raise ValueError( "The indices argument must be a NumPy array or a slice object." ) diff --git a/openfold3/core/utils/chunk_utils.py b/openfold3/core/utils/chunk_utils.py index 3b8032c9..79a86fb9 100644 --- a/openfold3/core/utils/chunk_utils.py +++ b/openfold3/core/utils/chunk_utils.py @@ -389,7 +389,7 @@ def _compare_arg_caches(self, ac1, ac2): consistent = True for a1, a2 in zip(ac1, ac2, strict=True): assert type(a1) is type(a2) - if isinstance(a1, (list, tuple)): + if isinstance(a1, list | tuple): consistent &= self._compare_arg_caches(a1, a2) elif isinstance(a1, dict): a1_items = [v for _, v in sorted(a1.items(), key=lambda x: x[0])] diff --git a/openfold3/core/utils/tensor_utils.py b/openfold3/core/utils/tensor_utils.py index 255174f4..8132cd1b 100644 --- a/openfold3/core/utils/tensor_utils.py +++ b/openfold3/core/utils/tensor_utils.py @@ -63,7 +63,7 @@ def dict_multimap(fn, dicts): new_dict[k] = [ dict_multimap(fn, [x[idx] for x in all_v]) for idx in range(len(v)) ] - elif isinstance(v, (AtomArray, str)): + elif isinstance(v, AtomArray | str): new_dict[k] = all_v else: new_dict[k] = fn(all_v) From ff2298dca89c4cf288d018198eab93306d987f2e Mon Sep 17 00:00:00 2001 From: jnwei Date: Fri, 21 Nov 2025 13:20:12 +0700 Subject: [PATCH 4/6] ruff formatting --- openfold3/core/data/tools/colabfold_msa_server.py | 2 +- openfold3/hacks.py | 5 +++-- openfold3/tests/test_colabfold_msa.py | 11 +++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/openfold3/core/data/tools/colabfold_msa_server.py b/openfold3/core/data/tools/colabfold_msa_server.py index d8b694ba..b52f7b33 100644 --- a/openfold3/core/data/tools/colabfold_msa_server.py +++ b/openfold3/core/data/tools/colabfold_msa_server.py @@ -724,7 +724,7 @@ def query_format_main(self): # Create empty DataFrame with expected column structure (at least column 0) # to match the structure when file is read with header=None. logger.warning( - f"Colabfold returned no templates. \ + "Colabfold returned no templates. \ Proceeding without template alignments for this batch." ) template_alignments = pd.DataFrame() diff --git a/openfold3/hacks.py b/openfold3/hacks.py index 8105d8bd..046f38e9 100644 --- a/openfold3/hacks.py +++ b/openfold3/hacks.py @@ -3,14 +3,15 @@ PLACEHOLDER_PATH = "placeholder" + def prep_deepspeed(): # deepspeed requires the envvar set, but doesn't care about value if not os.environ["CUTLASS_PATH"]: - os.environ["CUTLASS_PATH"] = os.environ.get("CUTLASS_PATH", PLACEHOLDER_PATH) + os.environ["CUTLASS_PATH"] = os.environ.get("CUTLASS_PATH", PLACEHOLDER_PATH) def prep_cutlass(): - # Skip this step if cutlass package itself is available + # Skip this step if cutlass package itself is available if os.environ["CUTLASS_PATH"] != PLACEHOLDER_PATH: return diff --git a/openfold3/tests/test_colabfold_msa.py b/openfold3/tests/test_colabfold_msa.py index 0a428be4..db85cc5a 100644 --- a/openfold3/tests/test_colabfold_msa.py +++ b/openfold3/tests/test_colabfold_msa.py @@ -139,7 +139,7 @@ def _make_dummy_template_file(path: Path): @staticmethod def _make_empty_template_file(path: Path): - """Create an empty pdb70.m8 file to simulate ColabFold returning empty template file.""" + """Create an empty pdb70.m8 file to simulate ColabFold returning empty template.""" raw_main_dir = path / "raw" / "main" raw_main_dir.mkdir(parents=True, exist_ok=True) # Create an empty file (0 bytes) @@ -334,7 +334,6 @@ def test_empty_m8_file_handling( test_sequence = "TESTSEQUENCE" query = self._construct_monomer_query(test_sequence) - # Create an empty pdb70.m8 file (0 bytes) to simulate ColabFold returning empty template file self._make_empty_template_file(tmp_path) mapper = collect_colabfold_msa_data(query) @@ -387,13 +386,13 @@ def test_empty_m8_file_handling( for chain in query_obj.chains: assert chain.template_alignment_file_path is None, ( f"Expected template_alignment_file_path to be None for chain " - f"{chain.chain_ids} of query {query_name} when template file is empty, " - f"but got {chain.template_alignment_file_path}" + f"{chain.chain_ids} of query {query_name} when template file " + f"is empty, but got {chain.template_alignment_file_path}" ) assert chain.template_entry_chain_ids is None, ( f"Expected template_entry_chain_ids to be None for chain " - f"{chain.chain_ids} of query {query_name} when template file is empty, " - f"but got {chain.template_entry_chain_ids}" + f"{chain.chain_ids} of query {query_name} when template file" + f"is empty, but got {chain.template_entry_chain_ids}" ) From 86661b4dbbfcc3baa9450cd1e19a812ffcb6d086 Mon Sep 17 00:00:00 2001 From: jnwei Date: Fri, 21 Nov 2025 14:42:47 +0700 Subject: [PATCH 5/6] ruff formatting --- openfold3/core/data/tools/colabfold_msa_server.py | 3 ++- openfold3/tests/test_colabfold_msa.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/openfold3/core/data/tools/colabfold_msa_server.py b/openfold3/core/data/tools/colabfold_msa_server.py index b52f7b33..be95746f 100644 --- a/openfold3/core/data/tools/colabfold_msa_server.py +++ b/openfold3/core/data/tools/colabfold_msa_server.py @@ -720,7 +720,8 @@ def query_format_main(self): ) m_with_templates = set(template_alignments[0]) else: - # pdb70.m8 downloaded by Colabfold returned empty - No template alignments available + # pdb70.m8 downloaded by Colabfold returned empty + # --> No template alignments available # Create empty DataFrame with expected column structure (at least column 0) # to match the structure when file is read with header=None. logger.warning( diff --git a/openfold3/tests/test_colabfold_msa.py b/openfold3/tests/test_colabfold_msa.py index db85cc5a..b039cada 100644 --- a/openfold3/tests/test_colabfold_msa.py +++ b/openfold3/tests/test_colabfold_msa.py @@ -139,7 +139,7 @@ def _make_dummy_template_file(path: Path): @staticmethod def _make_empty_template_file(path: Path): - """Create an empty pdb70.m8 file to simulate ColabFold returning empty template.""" + """Create an empty pdb70.m8 file to simulate ColabFold empty templates.""" raw_main_dir = path / "raw" / "main" raw_main_dir.mkdir(parents=True, exist_ok=True) # Create an empty file (0 bytes) From 34c39bac9f773da31e59e6dfed1186cba2063a0c Mon Sep 17 00:00:00 2001 From: jnwei Date: Fri, 21 Nov 2025 15:05:49 +0700 Subject: [PATCH 6/6] update hacks.py to more robust version --- openfold3/hacks.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/openfold3/hacks.py b/openfold3/hacks.py index 046f38e9..db606c24 100644 --- a/openfold3/hacks.py +++ b/openfold3/hacks.py @@ -1,18 +1,27 @@ +import importlib import os from pathlib import Path -PLACEHOLDER_PATH = "placeholder" - def prep_deepspeed(): # deepspeed requires the envvar set, but doesn't care about value - if not os.environ["CUTLASS_PATH"]: - os.environ["CUTLASS_PATH"] = os.environ.get("CUTLASS_PATH", PLACEHOLDER_PATH) + os.environ["CUTLASS_PATH"] = os.environ.get("CUTLASS_PATH", "placeholder") def prep_cutlass(): - # Skip this step if cutlass package itself is available - if os.environ["CUTLASS_PATH"] != PLACEHOLDER_PATH: + cutlass_lib_is_installed = importlib.util.find_spec("cutlass_library") is not None + cutlass_path = Path(os.environ.get("CUTLASS_PATH", "placeholder")) + + # This workaround is used when the conda environment is created with the + # environments/production.yml + installation of cutlass repo + if not cutlass_lib_is_installed: + if not cutlass_path.exists(): + raise OSError( + "CUTLASS_PATH environment variable is not set to a valid path, " + "and cutlass_library is not installed. Please install nvidia-cutlass" + "via pip or set CUTLASS_PATH to the root of a local cutlass clone." + ) + return # otherwise, apparently need to set the headers for cutlass