Skip to content

Commit

Permalink
Catch PDBBind analysis errors
Browse files Browse the repository at this point in the history
  • Loading branch information
amorehead committed Aug 11, 2024
1 parent 2e128f0 commit ef89b5b
Showing 1 changed file with 30 additions and 24 deletions.
54 changes: 30 additions & 24 deletions notebooks/dataset_interaction_analysis_plotting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -221,35 +221,41 @@
" for protein_filepath, ligand_filepath in tqdm(\n",
" pdbbind_protein_ligand_filepath_pairs, desc=\"Processing PDBBind 2020 set\"\n",
" ):\n",
" temp_protein_filepath = create_temp_pdb_with_only_molecule_type_residues(\n",
" protein_filepath, molecule_type=\"protein\"\n",
" )\n",
" ligand_mol = None\n",
" try:\n",
" ligand_mol = Chem.MolFromMolFile(ligand_filepath)\n",
" except Exception as e:\n",
" ligand_mol = Chem.MolFromMolFile(ligand_filepath, sanitize=False)\n",
" if ligand_mol is None:\n",
" print(\n",
" f\"Using the `.mol2` file for PDBBind 2020 target {ligand_filepath} failed. We found a `.sdf` file instead and are trying to use that. Be aware that the `.sdf` files from PDBBind 2020 are missing chirality tags, although we will do our best to impute such information automatically using RDKit. Reference: https://www.blopig.com/blog/2021/09/watch-out-when-using-pdbbind.\"\n",
" temp_protein_filepath = create_temp_pdb_with_only_molecule_type_residues(\n",
" protein_filepath, molecule_type=\"protein\"\n",
" )\n",
" ligand_mol = None\n",
" try:\n",
" ligand_mol = Chem.MolFromMolFile(ligand_filepath.replace(\".mol2\", \".sdf\"))\n",
" except Exception as e:\n",
" ligand_mol = Chem.MolFromMolFile(\n",
" ligand_filepath.replace(\".mol2\", \".sdf\"), sanitize=False\n",
" )\n",
" try:\n",
" Chem.rdmolops.AssignAtomChiralTagsFromStructure(ligand_mol)\n",
" ligand_mol = Chem.MolFromMolFile(ligand_filepath)\n",
" except Exception as e:\n",
" ligand_mol = Chem.MolFromMolFile(ligand_filepath, sanitize=False)\n",
" if ligand_mol is None:\n",
" print(\n",
" f\"Could not assign chirality tags to the atoms in the PDBBind ligand molecule from {ligand_filepath}.\"\n",
" f\"Using the `.mol2` file for PDBBind 2020 target {ligand_filepath} failed. We found a `.sdf` file instead and are trying to use that. Be aware that the `.sdf` files from PDBBind 2020 are missing chirality tags, although we will do our best to impute such information automatically using RDKit. Reference: https://www.blopig.com/blog/2021/09/watch-out-when-using-pdbbind.\"\n",
" )\n",
" if ligand_mol is None:\n",
" raise ValueError(f\"Could not load PDBBind 2020 ligand from {ligand_filepath}.\")\n",
" pc.load_protein_from_pdb(temp_protein_filepath)\n",
" pc.load_ligands_from_mols([ligand_mol])\n",
" pdbbind_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n",
" try:\n",
" ligand_mol = Chem.MolFromMolFile(ligand_filepath.replace(\".mol2\", \".sdf\"))\n",
" except Exception as e:\n",
" ligand_mol = Chem.MolFromMolFile(\n",
" ligand_filepath.replace(\".mol2\", \".sdf\"), sanitize=False\n",
" )\n",
" try:\n",
" Chem.rdmolops.AssignAtomChiralTagsFromStructure(ligand_mol)\n",
" except Exception as e:\n",
" print(\n",
" f\"Could not assign chirality tags to the atoms in the PDBBind ligand molecule from {ligand_filepath}.\"\n",
" )\n",
" if ligand_mol is None:\n",
" raise ValueError(f\"Could not load PDBBind 2020 ligand from {ligand_filepath}.\")\n",
" pc.load_protein_from_pdb(temp_protein_filepath)\n",
" pc.load_ligands_from_mols([ligand_mol])\n",
" pdbbind_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n",
" except Exception as e:\n",
" print(\n",
" f\"Error processing PDBBind filepaths {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n",
" )\n",
" continue\n",
"\n",
" # NOTE: we iteratively save the interaction dataframes to an HDF5 file\n",
" with pd.HDFStore(\n",
Expand Down Expand Up @@ -397,7 +403,7 @@
" dg_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n",
" except Exception as e:\n",
" print(\n",
" f\"Error processing Dockgen filepath pari {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n",
" f\"Error processing Dockgen filepaths {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n",
" )\n",
" continue\n",
"\n",
Expand Down

0 comments on commit ef89b5b

Please sign in to comment.