diff --git a/configs/model/dynamicbind_inference.yaml b/configs/model/dynamicbind_inference.yaml index 5be5219..c289e49 100644 --- a/configs/model/dynamicbind_inference.yaml +++ b/configs/model/dynamicbind_inference.yaml @@ -8,6 +8,7 @@ samples_per_complex: 40 # the number of samples to generate per complex savings_per_complex: 1 # the (top-N) number of sample visualizations to save per complex inference_steps: 20 # the number of inference steps to run for each complex batch_size: 5 # the batch size to use for inference +cache_path: ${oc.env:PROJECT_ROOT}/data/dynamicbind_cache/cache # the cache directory to use for storing intermediate data files header: ${dataset} # name of the results directory to create num_workers: 1 # the number of workers to use for native relaxation during inference skip_existing: true # whether to skip existing predictions diff --git a/docs/source/_static/PoseBench.png b/docs/source/_static/PoseBench.png index fbafb87..95e73af 100644 Binary files a/docs/source/_static/PoseBench.png and b/docs/source/_static/PoseBench.png differ diff --git a/forks/DynamicBind/run_single_protein_inference.py b/forks/DynamicBind/run_single_protein_inference.py index 105979c..4516a26 100755 --- a/forks/DynamicBind/run_single_protein_inference.py +++ b/forks/DynamicBind/run_single_protein_inference.py @@ -22,6 +22,7 @@ parser.add_argument('--savings_per_complex', type=int, default=1, help='num of samples data saved for movie generation.') parser.add_argument('--inference_steps', type=int, default=20, help='num of coordinate updates. (movie frames)') parser.add_argument('--batch_size', type=int, default=5, help='chunk size for inference batches.') +parser.add_argument('--cache_path', type=str, default='data/cache', help='Folder from where to load/restore cached dataset') parser.add_argument('--header', type=str, default='test', help='informative name used to name result folder') parser.add_argument('--results', type=str, default='results', help='result folder.') parser.add_argument('--device', type=int, default=0, help='CUDA_VISIBLE_DEVICES') @@ -302,7 +303,7 @@ def ref_filename_sort_key(filepath): do(cmd) cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')} {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" do(cmd) - cmd = f"{python} {script_folder}/inference.py --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" + cmd = f"{python} {script_folder}/inference.py --cache_path {args.cache_path} --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" cmd += f" --save_visualisation --model_dir {model_workdir} --protein_ligand_csv {ligandFile_with_protein_path} " cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" os.environ['CUDA_VISIBLE_DEVICES'] = str(args.device) @@ -391,7 +392,7 @@ def ref_filename_sort_key(filepath): do(cmd) cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')} {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" do(cmd) - cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/inference.py --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" + cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/inference.py --cache_path {args.cache_path} --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" cmd += f" --save_visualisation --model_dir {model_workdir} --protein_ligand_csv {ligandFile_with_protein_path} " cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" do(cmd) diff --git a/img/PoseBench.png b/img/PoseBench.png index fbafb87..95e73af 100644 Binary files a/img/PoseBench.png and b/img/PoseBench.png differ diff --git a/notebooks/casp15_all_multi_ligand_relaxed_lddt_pli_violin_plot.png b/notebooks/casp15_all_multi_ligand_relaxed_lddt_pli_violin_plot.png index 3b84381..47c0bca 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_lddt_pli_violin_plot.png and b/notebooks/casp15_all_multi_ligand_relaxed_lddt_pli_violin_plot.png differ diff --git a/notebooks/casp15_all_multi_ligand_relaxed_pb_valid_bar_chart.png b/notebooks/casp15_all_multi_ligand_relaxed_pb_valid_bar_chart.png index e278a86..30c2429 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_pb_valid_bar_chart.png and b/notebooks/casp15_all_multi_ligand_relaxed_pb_valid_bar_chart.png differ diff --git a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png index 44f2c6e..4a4f569 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_violin_plot.png b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_violin_plot.png index 7dbce00..7ec3386 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_lddt_pli_violin_plot.png b/notebooks/casp15_all_single_ligand_relaxed_lddt_pli_violin_plot.png index 2aad945..8895c59 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_lddt_pli_violin_plot.png and b/notebooks/casp15_all_single_ligand_relaxed_lddt_pli_violin_plot.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_pb_valid_bar_chart.png b/notebooks/casp15_all_single_ligand_relaxed_pb_valid_bar_chart.png index 103dd84..315e4c6 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_pb_valid_bar_chart.png and b/notebooks/casp15_all_single_ligand_relaxed_pb_valid_bar_chart.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png index 8522317..a3d63b6 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/casp15_all_single_ligand_relaxed_rmsd_violin_plot.png index 6a96a41..c08f7d5 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/casp15_all_single_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/casp15_inference_results_plotting.ipynb b/notebooks/casp15_inference_results_plotting.ipynb index 6ffc900..845dd59 100644 --- a/notebooks/casp15_inference_results_plotting.ipynb +++ b/notebooks/casp15_inference_results_plotting.ipynb @@ -87,7 +87,7 @@ " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", " \"neuralplexer_no_ilcl\": \"NeuralPLexer w/o ILCL\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", " \"tulip\": \"TULIP\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", " \"vina_p2rank\": \"P2Rank-Vina\",\n", @@ -133,11 +133,11 @@ " \"casp15\",\n", " f\"top_{method}{'' if 'ensemble' in method else '_ensemble'}_predictions_{repeat_index}\",\n", " )\n", - " globals()[\n", - " f\"{method}{config}_scoring_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[f\"{method}_output_dir_{repeat_index}\"] + config,\n", - " \"scoring_results.csv\",\n", + " globals()[f\"{method}{config}_scoring_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[f\"{method}_output_dir_{repeat_index}\"] + config,\n", + " \"scoring_results.csv\",\n", + " )\n", " )\n", " globals()[f\"{method}{config}_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[f\"{method}_output_dir_{repeat_index}\"] + config,\n", @@ -160,12 +160,6 @@ " .groupby([\"target\", \"mdl\"])[\"pose\"]\n", " .transform(\"count\")\n", " )\n", - " grouped_num_target_ligands = (\n", - " globals()[f\"{method}{config}_scoring_results_table_{repeat_index}\"]\n", - " .groupby([\"target\", \"mdl\"])[\"num_target_ligands\"]\n", - " .first()\n", - " )\n", - " num_ligands_per_complex = grouped_num_target_ligands.loc[(slice(None), 1)].tolist()\n", " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"] = (\n", " pd.read_csv(\n", " globals()[f\"{method}{config}_bust_results_csv_filepath_{repeat_index}\"]\n", @@ -204,6 +198,13 @@ " <= 2\n", " )\n", "\n", + " grouped_num_target_ligands = (\n", + " globals()[f\"{method}{config}_scoring_results_table_{repeat_index}\"]\n", + " .groupby([\"target\", \"mdl\"])[\"num_target_ligands\"]\n", + " .first()\n", + " )\n", + " num_ligands_per_complex = grouped_num_target_ligands.loc[(slice(None), 1)].tolist()\n", + "\n", " print(\n", " f\"{method_title}{config}_{repeat_index} CASP15 set average `lddt_pli`: {globals()[f'{method}{config}_scoring_results_table_{repeat_index}']['lddt_pli'].mean()}\"\n", " )\n", @@ -229,6 +230,16 @@ " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"].loc[\n", " :, \"dataset\"\n", " ] = \"casp15\"\n", + " # filter bust results to only those for targets that were scoreable using the CASP scoring pipeline\n", + " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"] = globals()[\n", + " f\"{method}{config}_bust_results_table_{repeat_index}\"\n", + " ][\n", + " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"].target.isin(\n", + " globals()[\n", + " f\"{method}{config}_scoring_results_table_{repeat_index}\"\n", + " ].target.unique()\n", + " )\n", + " ]\n", " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"].loc[\n", " :, \"num_target_ligands\"\n", " ] = num_ligands_per_complex\n", @@ -357,7 +368,7 @@ " )\n", " ]\n", " combined_data_list.append(pd.concat([casp15_results_table, casp15_relaxed_results_table]))\n", - "combined_data = pd.concat(combined_data_list)\n", + "combined_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", "\n", "for complex_type in [\"single\", \"multi\"]:\n", " for complex_license in [\"all\", \"public\"]:\n", @@ -439,7 +450,7 @@ " )\n", " ]\n", " combined_data_list.append(pd.concat([casp15_results_table, casp15_relaxed_results_table]))\n", - "combined_data = pd.concat(combined_data_list)\n", + "combined_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", "\n", "for complex_type in [\"single\", \"multi\"]:\n", " for complex_license in [\"all\", \"public\"]:\n", @@ -453,7 +464,7 @@ " hue=\"post-processing\",\n", " data=combined_data[\n", " # ignore outliers\n", - " (combined_data[\"rmsd\"] < 50)\n", + " (combined_data[\"rmsd\"] < 150)\n", " & (\n", " # filter the data based on the complex type and license\n", " combined_data[\"target\"].isin(\n", @@ -724,7 +735,7 @@ " \"NeuralPLexer\",\n", " \"DL-based blind\",\n", " \"NeuralPLexer w/o ILCL\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", " \"TULIP\",\n", " \"DiffDock-L-Vina\",\n", " \"Conventional blind\",\n", @@ -1042,7 +1053,7 @@ " \"NeuralPLexer\",\n", " \"DL-based blind\",\n", " \"NeuralPLexer w/o ILCL\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", " \"TULIP\",\n", " \"DiffDock-L-Vina\",\n", " \"Conventional blind\",\n", @@ -1108,7 +1119,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/casp15_method_interaction_analysis.png b/notebooks/casp15_method_interaction_analysis.png index 4adfa12..f51aaea 100644 Binary files a/notebooks/casp15_method_interaction_analysis.png and b/notebooks/casp15_method_interaction_analysis.png differ diff --git a/notebooks/casp15_method_interaction_analysis_plotting.ipynb b/notebooks/casp15_method_interaction_analysis_plotting.ipynb index bec2d6a..ea21b0f 100644 --- a/notebooks/casp15_method_interaction_analysis_plotting.ipynb +++ b/notebooks/casp15_method_interaction_analysis_plotting.ipynb @@ -98,7 +98,7 @@ " \"diffdock\": \"DiffDock-L\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", " \"tulip\": \"TULIP\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", " \"vina_p2rank\": \"P2Rank-Vina\",\n", @@ -482,7 +482,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/casp15_public_multi_ligand_relaxed_lddt_pli_violin_plot.png b/notebooks/casp15_public_multi_ligand_relaxed_lddt_pli_violin_plot.png index 5c80289..d21b2a0 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_lddt_pli_violin_plot.png and b/notebooks/casp15_public_multi_ligand_relaxed_lddt_pli_violin_plot.png differ diff --git a/notebooks/casp15_public_multi_ligand_relaxed_pb_valid_bar_chart.png b/notebooks/casp15_public_multi_ligand_relaxed_pb_valid_bar_chart.png index d6a6fac..5964f46 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_pb_valid_bar_chart.png and b/notebooks/casp15_public_multi_ligand_relaxed_pb_valid_bar_chart.png differ diff --git a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png index fce782b..0f8a29c 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_violin_plot.png b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_violin_plot.png index b236d66..543933f 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_lddt_pli_violin_plot.png b/notebooks/casp15_public_single_ligand_relaxed_lddt_pli_violin_plot.png index cb494af..27af449 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_lddt_pli_violin_plot.png and b/notebooks/casp15_public_single_ligand_relaxed_lddt_pli_violin_plot.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_pb_valid_bar_chart.png b/notebooks/casp15_public_single_ligand_relaxed_pb_valid_bar_chart.png index 92ffaad..3cfe771 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_pb_valid_bar_chart.png and b/notebooks/casp15_public_single_ligand_relaxed_pb_valid_bar_chart.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png index 8aa1e43..7f0592c 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/casp15_public_single_ligand_relaxed_rmsd_violin_plot.png index bfd6db0..7f0b1a7 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/casp15_public_single_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/dockgen_inference_results_plotting.ipynb b/notebooks/dockgen_inference_results_plotting.ipynb index dd1fee0..211e89b 100644 --- a/notebooks/dockgen_inference_results_plotting.ipynb +++ b/notebooks/dockgen_inference_results_plotting.ipynb @@ -96,12 +96,12 @@ " )\n", "\n", " # DiffDock (relaxed-protein) results\n", - " globals()[\n", - " f\"diffdock_relaxed_protein_dockgen_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_dockgen_output_{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"diffdock_relaxed_protein_dockgen_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdock_output_dir\"],\n", + " f\"diffdock_dockgen_output_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[\n", " f\"diffdock_relaxed_protein_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", @@ -129,12 +129,12 @@ " f\"dockgen_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"dynamicbind_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"dockgen_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"dynamicbind_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"dynamicbind_output_dir\"],\n", + " f\"dockgen_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # NeuralPLexer results\n", @@ -143,12 +143,12 @@ " f\"neuralplexer_dockgen_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"neuralplexer_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_dockgen_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"neuralplexer_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"neuralplexer_output_dir\"],\n", + " f\"neuralplexer_dockgen_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # RoseTTAFold-All-Atom results\n", @@ -169,12 +169,12 @@ " f\"vina_diffdock_dockgen_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_dockgen_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_diffdock_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_diffdock_dockgen_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # P2Rank-Vina results\n", @@ -183,38 +183,38 @@ " f\"vina_p2rank_dockgen_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_p2rank_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_p2rank_dockgen_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_p2rank_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_p2rank_dockgen_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # Consensus ensemble results\n", - " globals()[\n", - " f\"consensus_ensemble_dockgen_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_dockgen_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", + " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", - " globals()[\n", - " f\"consensus_ensemble_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", + " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", "# Mappings\n", "method_mapping = {\n", " \"diffdock\": \"DiffDock-L\",\n", - " \"diffdock_relaxed_protein\": \"DiffDock-L (Relaxed-Protein)\",\n", + " \"diffdock_relaxed_protein\": \"DiffDock-L-Relax-Prot\",\n", " \"fabind\": \"FABind\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", " \"vina_p2rank\": \"P2Rank-Vina\",\n", " \"consensus_ensemble\": \"Ensemble (Con)\",\n", @@ -429,7 +429,7 @@ " x=\"method\",\n", " y=\"rmsd\",\n", " hue=\"post-processing\",\n", - " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 50],\n", + " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 150], # ignore outliers\n", " split=True,\n", " inner=\"quartile\",\n", " palette=colors,\n", @@ -724,12 +724,12 @@ "axis.set_xticklabels(\n", " [\n", " \"DiffDock-L\",\n", - " \"DiffDock-L (Relax-P)\",\n", + " \"DiffDock-L-Relax-Prot\",\n", " \"FABind\",\n", " \"DL-based blind\",\n", " \"DynamicBind\",\n", " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", " \"DiffDock-L-Vina\",\n", " \"Conventional blind\",\n", " \"P2Rank-Vina\",\n", @@ -791,7 +791,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/dockgen_single_ligand_relaxed_bar_chart.png b/notebooks/dockgen_single_ligand_relaxed_bar_chart.png index c743056..bcd7209 100644 Binary files a/notebooks/dockgen_single_ligand_relaxed_bar_chart.png and b/notebooks/dockgen_single_ligand_relaxed_bar_chart.png differ diff --git a/notebooks/dockgen_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/dockgen_single_ligand_relaxed_rmsd_violin_plot.png index ce3397a..fb10d7c 100644 Binary files a/notebooks/dockgen_single_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/dockgen_single_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/posebusters_astex_inference_results_plotting.ipynb b/notebooks/posebusters_astex_inference_results_plotting.ipynb index 6809022..f869a53 100644 --- a/notebooks/posebusters_astex_inference_results_plotting.ipynb +++ b/notebooks/posebusters_astex_inference_results_plotting.ipynb @@ -100,12 +100,12 @@ " f\"diffdock_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdock_output_dir\"],\n", + " f\"diffdock_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # FABind results\n", @@ -124,36 +124,36 @@ " f\"fabind_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"fabind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"fabind_output_dir\"],\n", - " f\"fabind_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"fabind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"fabind_output_dir\"],\n", + " f\"fabind_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # DynamicBind results\n", " globals()[f\"dynamicbind_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"dynamicbind_output_dir\"], f\"astex_diverse_{repeat_index}\", \"bust_results.csv\"\n", " )\n", - " globals()[\n", - " f\"dynamicbind_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"astex_diverse_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"dynamicbind_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"dynamicbind_output_dir\"],\n", + " f\"astex_diverse_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[f\"dynamicbind_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"dynamicbind_output_dir\"],\n", " f\"posebusters_benchmark_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"dynamicbind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"posebusters_benchmark_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"dynamicbind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"dynamicbind_output_dir\"],\n", + " f\"posebusters_benchmark_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # NeuralPLexer results\n", @@ -162,24 +162,24 @@ " f\"neuralplexer_astex_diverse_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"neuralplexer_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_astex_diverse_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"neuralplexer_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"neuralplexer_output_dir\"],\n", + " f\"neuralplexer_astex_diverse_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[f\"neuralplexer_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"neuralplexer_output_dir\"],\n", " f\"neuralplexer_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"neuralplexer_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"neuralplexer_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"neuralplexer_output_dir\"],\n", + " f\"neuralplexer_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # RoseTTAFold-All-Atom results\n", @@ -220,12 +220,12 @@ " f\"tulip_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"tulip_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"tulip_output_dir\"],\n", - " f\"tulip_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"tulip_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"tulip_output_dir\"],\n", + " f\"tulip_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # DiffDock-L-Vina results\n", @@ -234,26 +234,26 @@ " f\"vina_diffdock_astex_diverse_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_astex_diverse_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_diffdock_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_diffdock_astex_diverse_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_posebusters_benchmark_outputs_{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_diffdock_posebusters_benchmark_outputs_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_diffdock_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # P2Rank-Vina results\n", @@ -262,24 +262,24 @@ " f\"vina_p2rank_astex_diverse_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_p2rank_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_p2rank_astex_diverse_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_p2rank_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_p2rank_astex_diverse_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[f\"vina_p2rank_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"vina_output_dir\"],\n", " f\"vina_p2rank_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_p2rank_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_p2rank_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_p2rank_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_p2rank_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # Ensemble (consensus) results\n", @@ -288,19 +288,19 @@ " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"consensus_ensemble_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"astex_diverse\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"astex_diverse\"),\n", + " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", - " globals()[\n", - " f\"consensus_ensemble_posebusters_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_posebusters_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", + " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[\n", " f\"consensus_ensemble_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", @@ -316,7 +316,7 @@ " \"fabind\": \"FABind\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", " \"tulip\": \"TULIP\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", " \"vina_p2rank\": \"P2Rank-Vina\",\n", @@ -566,7 +566,7 @@ " & (globals()[f\"results_table_{repeat_index}\"][\"post-processing\"] == \"none\")\n", " ]\n", " combined_data_list.append(pd.concat([pb_results_table, ad_results_table]))\n", - "combined_data = pd.concat(combined_data_list)\n", + "combined_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", "\n", "# set the size of the figure\n", "plt.figure(figsize=(12, 6))\n", @@ -576,7 +576,7 @@ " x=\"method\",\n", " y=\"rmsd\",\n", " hue=\"dataset\",\n", - " data=combined_data[combined_data[\"rmsd\"] < 100], # ignore outliers\n", + " data=combined_data[combined_data[\"rmsd\"] < 150], # ignore outliers\n", " split=True,\n", " inner=\"quartile\",\n", " palette=colors,\n", @@ -622,7 +622,7 @@ " & (globals()[f\"results_table_{repeat_index}\"][\"post-processing\"] == \"none\")\n", " ]\n", " combined_data_list.append(pd.concat([pb_relaxed_results_table, pb_unrelaxed_results_table]))\n", - "combined_relaxed_data = pd.concat(combined_data_list)\n", + "combined_relaxed_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", "\n", "# set the size of the figure\n", "plt.figure(figsize=(12, 6))\n", @@ -632,7 +632,7 @@ " x=\"method\",\n", " y=\"rmsd\",\n", " hue=\"post-processing\",\n", - " data=combined_relaxed_data,\n", + " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 150], # ignore outliers\n", " split=True,\n", " inner=\"quartile\",\n", " palette=colors,\n", @@ -928,7 +928,7 @@ " \"DynamicBind\",\n", " \"DL-based blind\",\n", " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", " \"TULIP\",\n", " \"DiffDock-L-Vina\",\n", " \"Conventional blind\",\n", @@ -1258,7 +1258,7 @@ " \"DynamicBind\",\n", " \"DL-based blind\",\n", " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", " \"TULIP\",\n", " \"DiffDock-L-Vina\",\n", " \"Conventional blind\",\n", @@ -1321,7 +1321,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/posebusters_astex_single_ligand_bar_chart.png b/notebooks/posebusters_astex_single_ligand_bar_chart.png index ef31d88..7d2d3c4 100644 Binary files a/notebooks/posebusters_astex_single_ligand_bar_chart.png and b/notebooks/posebusters_astex_single_ligand_bar_chart.png differ diff --git a/notebooks/posebusters_astex_single_ligand_rmsd_violin_plot.png b/notebooks/posebusters_astex_single_ligand_rmsd_violin_plot.png index b016d93..b99ae4d 100644 Binary files a/notebooks/posebusters_astex_single_ligand_rmsd_violin_plot.png and b/notebooks/posebusters_astex_single_ligand_rmsd_violin_plot.png differ diff --git a/notebooks/posebusters_pocket_only_inference_results_plotting.ipynb b/notebooks/posebusters_pocket_only_inference_results_plotting.ipynb index f646826..817bd6a 100644 --- a/notebooks/posebusters_pocket_only_inference_results_plotting.ipynb +++ b/notebooks/posebusters_pocket_only_inference_results_plotting.ipynb @@ -77,7 +77,7 @@ "globals()[\"diffdock_output_dir\"] = os.path.join(\"..\", \"forks\", \"DiffDock\", \"inference\")\n", "globals()[\"fabind_output_dir\"] = os.path.join(\"..\", \"forks\", \"FABind\", \"inference\")\n", "globals()[\"dynamicbind_output_dir\"] = os.path.join(\n", - " \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results_pocket_only\"\n", + " \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results\"\n", ")\n", "globals()[\"neuralplexer_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer\", \"inference\")\n", "globals()[\"rfaa_output_dir\"] = os.path.join(\"..\", \"forks\", \"RoseTTAFold-All-Atom\", \"inference\")\n", @@ -89,12 +89,12 @@ " f\"diffdock{pocket_suffix}_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock{pocket_suffix}_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdock_output_dir\"],\n", + " f\"diffdock{pocket_suffix}_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # FABind results\n", @@ -103,12 +103,12 @@ " f\"fabind{pocket_suffix}_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"fabind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"fabind_output_dir\"],\n", - " f\"fabind{pocket_suffix}_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"fabind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"fabind_output_dir\"],\n", + " f\"fabind{pocket_suffix}_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # DynamicBind results\n", @@ -117,12 +117,12 @@ " f\"posebusters_benchmark{pocket_suffix}_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"dynamicbind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"posebusters_benchmark{pocket_suffix}_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"dynamicbind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"dynamicbind_output_dir\"],\n", + " f\"posebusters_benchmark{pocket_suffix}_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # NeuralPLexer results\n", @@ -131,12 +131,12 @@ " f\"neuralplexer{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"neuralplexer_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"neuralplexer_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"neuralplexer_output_dir\"],\n", + " f\"neuralplexer{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # RoseTTAFold-All-Atom results\n", @@ -152,19 +152,19 @@ " )\n", "\n", " # DiffDock-L-Vina results\n", - " globals()[\n", - " f\"vina_diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina{pocket_suffix}_diffdock_posebusters_benchmark_outputs_{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina{pocket_suffix}_diffdock_posebusters_benchmark_outputs_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina{pocket_suffix}_diffdock_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina{pocket_suffix}_diffdock_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # P2Rank-Vina results\n", @@ -173,21 +173,21 @@ " f\"vina{pocket_suffix}_p2rank_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_p2rank_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina{pocket_suffix}_p2rank_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_p2rank_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina{pocket_suffix}_p2rank_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # Consensus ensemble results\n", - " globals()[\n", - " f\"consensus_ensemble_posebusters_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", - " f\"top_consensus{pocket_suffix}_ensemble_predictions_{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_posebusters_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", + " f\"top_consensus{pocket_suffix}_ensemble_predictions_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[\n", " f\"consensus_ensemble_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", @@ -203,7 +203,7 @@ " \"fabind\": \"FABind\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", " \"vina_p2rank\": \"P2Rank-Vina\",\n", " \"consensus_ensemble\": \"Ensemble (Con)\",\n", @@ -419,7 +419,7 @@ " x=\"method\",\n", " y=\"rmsd\",\n", " hue=\"post-processing\",\n", - " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 50],\n", + " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 150], # ignore outliers\n", " split=True,\n", " inner=\"quartile\",\n", " palette=colors,\n", @@ -726,7 +726,7 @@ " \"DynamicBind\",\n", " \"DL-based blind\",\n", " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", " \"DiffDock-L-Vina\",\n", " \"Conventional blind\",\n", " \"P2Rank-Vina\",\n", diff --git a/notebooks/posebusters_single_ligand_pocket_only_relaxed_bar_chart.png b/notebooks/posebusters_single_ligand_pocket_only_relaxed_bar_chart.png index 8f90928..8e0ee88 100644 Binary files a/notebooks/posebusters_single_ligand_pocket_only_relaxed_bar_chart.png and b/notebooks/posebusters_single_ligand_pocket_only_relaxed_bar_chart.png differ diff --git a/notebooks/posebusters_single_ligand_pocket_only_relaxed_rmsd_violin_plot.png b/notebooks/posebusters_single_ligand_pocket_only_relaxed_rmsd_violin_plot.png index b52ab66..c54e5d0 100644 Binary files a/notebooks/posebusters_single_ligand_pocket_only_relaxed_rmsd_violin_plot.png and b/notebooks/posebusters_single_ligand_pocket_only_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/posebusters_single_ligand_relaxed_bar_chart.png b/notebooks/posebusters_single_ligand_relaxed_bar_chart.png index 3938eea..e1a0dfa 100644 Binary files a/notebooks/posebusters_single_ligand_relaxed_bar_chart.png and b/notebooks/posebusters_single_ligand_relaxed_bar_chart.png differ diff --git a/notebooks/posebusters_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/posebusters_single_ligand_relaxed_rmsd_violin_plot.png index 2386f37..baebca6 100644 Binary files a/notebooks/posebusters_single_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/posebusters_single_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/posebench/models/dynamicbind_inference.py b/posebench/models/dynamicbind_inference.py index b154697..75371ce 100644 --- a/posebench/models/dynamicbind_inference.py +++ b/posebench/models/dynamicbind_inference.py @@ -6,6 +6,7 @@ import logging import os import subprocess # nosec +import uuid from pathlib import Path import hydra @@ -116,6 +117,11 @@ def main(cfg: DictConfig): f"Skipping inference for completed protein `{protein_filepath}` and ligand `{ligand_filepath}`." ) continue + unique_cache_id = uuid.uuid4() + unique_cache_path = ( + str(cfg.cache_path) + + f"_{cfg.dataset}{pocket_only_suffix}_{ligand_filepath.stem}_{cfg.repeat_index}_{unique_cache_id}" + ) try: subprocess.run( [ @@ -131,6 +137,8 @@ def main(cfg: DictConfig): str(cfg.inference_steps), "--batch_size", str(cfg.batch_size), + "--cache_path", + unique_cache_path, "--header", str(cfg.header) + f"{pocket_only_suffix}_{ligand_filepath.stem}"