Skip to content

Commit

Permalink
Ddl: category keys can be implicit
Browse files Browse the repository at this point in the history
  • Loading branch information
wojdyr committed Jan 9, 2025
1 parent de4ebab commit 832b039
Showing 4 changed files with 53 additions and 23 deletions.
2 changes: 0 additions & 2 deletions include/gemmi/ddl.hpp
Original file line number Diff line number Diff line change
@@ -61,8 +61,6 @@ struct GEMMI_DLL Ddl {
std::vector<ParentLink> parents_;
// storage for DDL2 _item_linked.child_name -> _item_linked.parent_name
std::map<std::string, std::string> item_parents_;
// counter that allows to limit the number of errors
mutable int missing_category_key_errors = 0;

cif::Block* find_rules(const std::string& name) const {
auto iter = name_index_.find(to_lower(name));
38 changes: 21 additions & 17 deletions src/ddl.cpp
Original file line number Diff line number Diff line change
@@ -263,6 +263,8 @@ void Ddl::check_mandatory_items(const cif::Block& b) const {
// make a list of items in each category in the block
std::map<std::string, std::vector<std::string>> categories;
auto add_category = [&](const std::string& tag) {
if (!find_rules(tag)) // ignore unknown tags
return;
size_t pos = tag.find('.');
if (pos != std::string::npos)
categories[to_lower(tag.substr(0, pos+1))].push_back(to_lower(tag.substr(pos+1)));
@@ -274,6 +276,7 @@ void Ddl::check_mandatory_items(const cif::Block& b) const {
for (const std::string& tag : item.loop.tags)
add_category(tag);
}

// go over categories and check if nothing is missing
for (const auto& cat : categories) {
size_t n = cat.first.size();
@@ -287,29 +290,30 @@ void Ddl::check_mandatory_items(const cif::Block& b) const {
if (use_context)
if (const std::string* ct = cat_block->find_value("_pdbx_category_context.type"))
logger.mesg(br(b), "category indicated as ", *ct, ": ", cat_name);
// check key items
for (const std::string& v : cat_block->find_values("_category_key.name")) {
std::string key = cif::as_string(v);
if (!gemmi::istarts_with(key, cat.first))
logger.level<3>("inconsistent dictionary: wrong _category_key for ", cat_name);
if (!gemmi::in_vector(to_lower(key.substr(n)), cat.second)) {
// In mmcif_pdbx_v50.dic a category key is missing so often,
// that this generates about 20,000 errors, resulting in too much text.
if (missing_category_key_errors < 20 || logger.threshold >= 7)
warn(b, "missing category key: ", key);
else if (missing_category_key_errors == 20)
logger.level<3>("(Increase verbosity to show all missing-category-key errors.)");
missing_category_key_errors++;
}
}
std::vector<std::string> implicit_items;
// check mandatory items
for (auto i = name_index_.lower_bound(cat.first);
i != name_index_.end() && gemmi::starts_with(i->first, cat.first);
++i) {
for (auto row : i->second->find("_item.", {"name", "mandatory_code"}))
if (row.str(1)[0] == 'y' && iequal(row.str(0), i->first) &&
for (auto row : i->second->find("_item.", {"name", "mandatory_code"})) {
// mandatory_code can be one of: yes, not, implicit, implicit-ordinal
char mc0 = row.str(1)[0];
if (mc0 == 'y' && iequal(row.str(0), i->first) &&
!gemmi::in_vector(i->first.substr(n), cat.second))
warn(b, "missing mandatory tag: ", i->first);
else if (mc0 == 'i')
implicit_items.push_back(gemmi::to_lower(row.str(0)));
}
}
// check key items
for (const std::string& v : cat_block->find_values("_category_key.name")) {
std::string key = gemmi::to_lower(cif::as_string(v));
if (!gemmi::starts_with(key, cat.first))
logger.level<3>("inconsistent dictionary: wrong _category_key for ", cat_name);
if (!gemmi::in_vector(key.substr(n), cat.second) &&
// check if the key item is implicit (a feature is used in mmcif_ddl.dic)
!in_vector(key, implicit_items))
warn(b, "missing category key: ", key);
}
}
}
25 changes: 25 additions & 0 deletions tests/mmcif_pdbx_v50_frag.dic
Original file line number Diff line number Diff line change
@@ -898,6 +898,31 @@ save_atom_site
;
save_

save__atom_site.id
_item_description.description
; The value of _atom_site.id must uniquely identify a record in the
ATOM_SITE list.
<snip>
;

#
loop_
_item.name
_item.category_id
_item.mandatory_code
"_atom_site.id" atom_site yes
"_atom_site_anisotrop.id" atom_site_anisotrop yes
"_geom_angle.atom_site_id_1" geom_angle yes
# <snip>
#
_item_aliases.alias_name "_atom_site_label"
_item_aliases.dictionary cif_core.dic
_item_aliases.version 2.0.1
#
# <snip>
_item_type.code code
# <snip>
save_
# <snip>

save__atom_site.attached_hydrogens
11 changes: 7 additions & 4 deletions tests/test_cif.py
Original file line number Diff line number Diff line change
@@ -401,20 +401,23 @@ def test_validation(self):
self.assertEqual(msg_list,
['[dummy_block] unknown tag _custom_tag',
'[dummy_block] unknown tag _another_one',
'[dummy_block] missing mandatory tag: _atom_site.id',
'[dummy_block] missing category key: _atom_site.id'])

# try out set_logger()
counter = 0
def incr_counter(_):
nonlocal counter
counter += 1
ddl.set_logger((incr_counter, 7))
ddl.validate_cif(doc)
self.assertEqual(counter, 3)
self.assertEqual(counter, len(msg_list))

msg_list = []
ddl.set_logger(lambda msg: msg_list.append(msg))
doc = cif.read_string("""
data_b2
_something.unexpected here
loop_
_atom_site.id
_atom_site.auth_asym_id
@@ -427,10 +430,10 @@ def incr_counter(_):
""")
ddl.validate_cif(doc)
self.assertEqual(msg_list, [
'[b2] unknown tag _atom_site.id', # 1
"string:3 [b2] _atom_site.auth_asym_id: "
'[b2] unknown tag _something.unexpected', # 1
"string:4 [b2] _atom_site.auth_asym_id: "
"'hey hey' does not match the code regex", # 2
'string:3 [b2] _atom_site.attached_hydrogens: '
'string:4 [b2] _atom_site.attached_hydrogens: '
'value out of expected range: -1', # 3
'[b2] category atom_site has 1 duplicated key:\n id=2', # 4
])

0 comments on commit 832b039

Please sign in to comment.