From 3cd27f13fdb8dbc00b7456784d9925af5b30b63d Mon Sep 17 00:00:00 2001 From: "Maarten L. Hekkelman" Date: Tue, 5 Mar 2024 13:02:31 +0100 Subject: [PATCH] calculate formula_weight when missing --- src/pdb/pdb2cif.cpp | 41 ------------------------- src/pdb/reconstruct.cpp | 67 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 41 deletions(-) diff --git a/src/pdb/pdb2cif.cpp b/src/pdb/pdb2cif.cpp index 3c446db..c914a24 100644 --- a/src/pdb/pdb2cif.cpp +++ b/src/pdb/pdb2cif.cpp @@ -4669,47 +4669,6 @@ void PDBFileParser::ConstructEntities() } } } - - // Finish by calculating the formula_weight for each entity - for (auto entity : *getCategory("entity")) - { - auto entity_id = entity["id"].as(); - float formula_weight = 0; - - if (entity["type"] == "polymer") - { - int n = 0; - - for (std::string comp_id : getCategory("pdbx_poly_seq_scheme")->find(cif::key("entity_id") == entity_id, "mon_id")) - { - auto compound = cif::compound_factory::instance().create(comp_id); - assert(compound); - if (not compound) - throw std::runtime_error("missing information for compound " + comp_id); - formula_weight += compound->formula_weight(); - ++n; - } - - formula_weight -= (n - 1) * 18.015; - } - else if (entity["type"] == "water") - formula_weight = 18.015; - else - { - auto comp_id = getCategory("pdbx_nonpoly_scheme")->find_first>(cif::key("entity_id") == entity_id, "mon_id"); - if (comp_id.has_value()) - { - auto compound = cif::compound_factory::instance().create(*comp_id); - assert(compound); - if (not compound) - throw std::runtime_error("missing information for compound " + *comp_id); - formula_weight = compound->formula_weight(); - } - } - - if (formula_weight > 0) - entity.assign({ { "formula_weight", formula_weight, 3 } }); - } } void PDBFileParser::ConstructSugarTrees(int &asymNr) diff --git a/src/pdb/reconstruct.cpp b/src/pdb/reconstruct.cpp index 6c768d1..5ac7326 100644 --- a/src/pdb/reconstruct.cpp +++ b/src/pdb/reconstruct.cpp @@ -92,6 +92,70 @@ condition get_condition(residue_key_type &k) // -------------------------------------------------------------------- +void checkEntities(datablock &db) +{ + using namespace cif::literals; + + auto &cf = cif::compound_factory::instance(); + + for (auto entity : db["entity"].find("formula_weight"_key == null or "formula_weight"_key == 0)) + { + const auto &[entity_id, type] = entity.get("id", "type"); + + float formula_weight = 0; + + if (type == "polymer") + { + int n = 0; + + for (std::string comp_id : db["pdbx_poly_seq_scheme"].find("entity_id"_key == entity_id, "mon_id")) + { + auto compound = cf.create(comp_id); + assert(compound); + if (not compound) + throw std::runtime_error("missing information for compound " + comp_id); + formula_weight += compound->formula_weight(); + ++n; + } + + formula_weight -= (n - 1) * 18.015; + } + else if (type == "water") + formula_weight = 18.015; + else if (type == "branched") + { + int n = 0; + + for (std::string comp_id : db["pdbx_entity_branch_list"].find("entity_id"_key == entity_id, "comp_id")) + { + auto compound = cf.create(comp_id); + assert(compound); + if (not compound) + throw std::runtime_error("missing information for compound " + comp_id); + formula_weight += compound->formula_weight(); + ++n; + } + + formula_weight -= (n - 1) * 18.015; + } + else if (type == "non-polymer") + { + auto comp_id = db["pdbx_nonpoly_scheme"].find_first>("entity_id"_key == entity_id, "mon_id"); + if (comp_id.has_value()) + { + auto compound = cf.create(*comp_id); + assert(compound); + if (not compound) + throw std::runtime_error("missing information for compound " + *comp_id); + formula_weight = compound->formula_weight(); + } + } + + if (formula_weight > 0) + entity.assign({ { "formula_weight", formula_weight, 3 } }); + } +} + void createEntityIDs(datablock &db) { // Suppose the file does not have entity ID's. We have to make up some @@ -1265,6 +1329,9 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary) if (db.get("entity") == nullptr) createEntity(db); + // fill in missing formula_weight, e.g. + checkEntities(db); + if (db.get("pdbx_poly_seq_scheme") == nullptr) createPdbxPolySeqScheme(db);