@@ -545,11 +545,28 @@ class local_compound_factory_impl : public compound_factory_impl
545
545
: compound_factory_impl(next)
546
546
, m_local_file(file)
547
547
{
548
+ const std::regex peptideRx (" (?:[lmp]-)?peptide" , std::regex::icase);
549
+
550
+ for (const auto &[id, name, threeLetterCode, group] :
551
+ file[" comp_list" ][" chem_comp" ].rows <std::string, std::string, std::string, std::string>(" id" , " name" , " three_letter_code" , " group" ))
552
+ {
553
+ auto &rdb = m_local_file[" comp_" + id];
554
+ if (rdb.empty ())
555
+ {
556
+ std::cerr << " Missing data in restraint file for id " + id + ' \n ' ;
557
+ continue ;
558
+ }
559
+
560
+ construct_compound (rdb, id, name, threeLetterCode, group);
561
+ }
548
562
}
549
563
550
564
compound *create (const std::string &id) override ;
551
565
552
566
private:
567
+
568
+ compound *construct_compound (const datablock &db, const std::string &id, const std::string &name, const std::string &three_letter_code, const std::string &group);
569
+
553
570
cif::file m_local_file;
554
571
};
555
572
@@ -559,30 +576,125 @@ compound *local_compound_factory_impl::create(const std::string &id)
559
576
560
577
for (auto &db : m_local_file)
561
578
{
562
- if (db.name () == id)
579
+ if (db.name () == " comp_ " + id)
563
580
{
564
- cif::datablock db_copy (db);
581
+ auto chem_comp = db.get (" chem_comp" );
582
+ if (not chem_comp)
583
+ break ;
565
584
566
585
try
567
586
{
568
- result = new compound (db_copy, 1 );
587
+ const auto &[id, name, threeLetterCode, group] =
588
+ chem_comp->front ().get <std::string, std::string, std::string, std::string>(" id" , " name" , " three_letter_code" , " group" );
589
+
590
+ result = construct_compound (db, id, name, threeLetterCode, group);
569
591
}
570
592
catch (const std::exception &ex)
571
593
{
572
594
std::throw_with_nested (std::runtime_error (" Error loading compound " + id));
573
595
}
574
596
575
-
576
- std::shared_lock lock (mMutex );
577
- m_compounds.push_back (result);
578
-
579
597
break ;
580
598
}
581
599
}
582
600
583
601
return result;
584
602
}
585
603
604
+ compound *local_compound_factory_impl::construct_compound (const datablock &rdb, const std::string &id,
605
+ const std::string &name, const std::string &three_letter_code, const std::string &group)
606
+ {
607
+ cif::datablock db (id);
608
+
609
+ float formula_weight = 0 ;
610
+ int formal_charge = 0 ;
611
+ std::map<std::string,size_t > formula_data;
612
+
613
+ for (size_t ord = 1 ; const auto &[atom_id, type_symbol, type, charge, x, y, z] :
614
+ rdb[" chem_comp_atom" ].rows <std::string, std::string, std::string, int , float , float , float >(
615
+ " atom_id" , " type_symbol" , " type" , " charge" , " x" , " y" , " z" ))
616
+ {
617
+ auto atom = cif::atom_type_traits (type_symbol);
618
+ formula_weight += atom.weight ();
619
+
620
+ formula_data[type_symbol] += 1 ;
621
+
622
+ db[" chem_comp_atom" ].emplace ({
623
+ { " comp_id" , id },
624
+ { " atom_id" , atom_id },
625
+ { " type_symbol" , type_symbol },
626
+ { " charge" , charge },
627
+ { " model_Cartn_x" , x, 3 },
628
+ { " model_Cartn_y" , y, 3 },
629
+ { " model_Cartn_z" , z, 3 },
630
+ { " pdbx_ordinal" , ord++ }
631
+ });
632
+
633
+ formal_charge += charge;
634
+ }
635
+
636
+ for (size_t ord = 1 ; const auto &[atom_id_1, atom_id_2, type, aromatic] :
637
+ rdb[" chem_comp_bond" ].rows <std::string, std::string, std::string, bool >(" atom_id_1" , " atom_id_2" , " type" , " aromatic" ))
638
+ {
639
+ std::string value_order (" SING" );
640
+
641
+ if (cif::iequals (type, " single" ) or cif::iequals (type, " sing" ))
642
+ value_order = " SING" ;
643
+ else if (cif::iequals (type, " double" ) or cif::iequals (type, " doub" ))
644
+ value_order = " DOUB" ;
645
+ else if (cif::iequals (type, " triple" ) or cif::iequals (type, " trip" ))
646
+ value_order = " TRIP" ;
647
+
648
+ db[" chem_comp_bond" ].emplace ({
649
+ { " comp_id" , id },
650
+ { " atom_id_1" , atom_id_1 },
651
+ { " atom_id_2" , atom_id_2 },
652
+ { " value_order" , value_order },
653
+ { " pdbx_aromatic_flag" , aromatic },
654
+ // TODO: fetch stereo_config info from chem_comp_chir
655
+ { " pdbx_ordinal" , ord++ }
656
+ });
657
+ }
658
+
659
+ db.emplace_back (rdb[" pdbx_chem_comp_descriptor" ]);
660
+
661
+ std::string formula;
662
+ for (bool first = true ; const auto &[symbol, count]: formula_data)
663
+ {
664
+ if (std::exchange (first, false ))
665
+ formula += ' ' ;
666
+ formula += symbol;
667
+ if (count > 1 )
668
+ formula += std::to_string (count);
669
+ }
670
+
671
+ std::string type;
672
+ if (cif::iequals (group, " peptide" ) or cif::iequals (group, " l-peptide" ) or cif::iequals (group, " l-peptide linking" ))
673
+ type = " L-PEPTIDE LINKING" ;
674
+ else if (cif::iequals (group, " dna" ))
675
+ type = " DNA LINKING" ;
676
+ else if (cif::iequals (group, " rna" ))
677
+ type = " RNA LINKING" ;
678
+ else
679
+ type = " NON-POLYMER" ;
680
+
681
+ db[" chem_comp" ].emplace ({
682
+ { " id" , id },
683
+ { " name" , name },
684
+ { " type" , type },
685
+ { " formula" , formula },
686
+ { " pdbx_formal_charge" , formal_charge },
687
+ { " formula_weight" , formula_weight },
688
+ { " three_letter_code" , three_letter_code }
689
+ });
690
+
691
+ std::shared_lock lock (mMutex );
692
+
693
+ auto result = new compound (db);
694
+ m_compounds.push_back (result);
695
+ return result;
696
+ }
697
+
586
698
// --------------------------------------------------------------------
587
699
588
700
std::unique_ptr<compound_factory> compound_factory::s_instance;
0 commit comments