Skip to content

Commit e72d7b8

Browse files
committed
ddl.cpp: tweak for regexes
1 parent 832b039 commit e72d7b8

File tree

3 files changed

+7
-12
lines changed

3 files changed

+7
-12
lines changed

src/ddl.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -475,9 +475,9 @@ void Ddl::read_ddl2_block(cif::Block& block) {
475475
// mmcif_pdbx_v50.dic uses custom flavour of regex:
476476
// character classes have unescaped \, but recognize \n, \t, etc.
477477
// Here is a quick fix:
478-
std::string::size_type pos = re_str.find("/\\{}");
479-
if (pos != std::string::npos)
480-
re_str.replace(pos, 4, "/\\\\{}");
478+
gemmi::replace_all(re_str, "/\\{}", "/\\\\{}");
479+
// in binary, \<newline> is apparently meant to be ignored
480+
gemmi::replace_all(re_str, "\\\n", "");
481481
auto flag = std::regex::awk | std::regex::optimize;
482482
regexes_.emplace(row.str(0), std::regex(re_str, flag));
483483
} catch (const std::regex_error& e) {

tests/mmcif_pdbx_v50_frag.dic

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,8 @@ data_mmcif_pdbx.dic
540540
'[A-Za-z0-9]+(,[A-Za-z0-9]+)*'
541541
; A list of comma separated chain or asym ids.
542542
;
543-
3x4_matrices char
544-
'((([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3}?(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n([\t ]*\n)*)){3})*(([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3}?(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n([\t ]*\n)*)){2}(([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3}(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n?([\t ]*\n)*))[ \t]*'
543+
544+
3x4_matrices char "(((([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3})?(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n([\t ]*\n)*)){3})*((([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3})?(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n([\t ]*\n)*)){2}((([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3})(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n?([\t ]*\n)*))[ \t]*"
545545
; A set of 3x4 matrices separated by spaces and newlines on each line.
546546
Final newline optional. Optional spaces at start of lines. Blank lines accepted.
547547
;
@@ -556,7 +556,7 @@ data_mmcif_pdbx.dic
556556
author char "[A-Za-z0-9_]+(( |-|'|\. )[A-Za-z0-9_]+)*( Jr.| III)?, [A-Za-z0-9_]\.(-?[A-Za-z0-9_]+\.)*$" 'Author name in PDB format: Taylor, C.A.'
557557
orcid_id char "[0-9]{4}-[0-9]{4}-[0-9]{4}-([0-9]{3}X|[0-9]{4})" 'ORCID pattern - dddd-dddd-dddd-dddd|dddX'
558558
symmetry_operation char '[-+0-9XxYyZ/ ]+,[-+0-9XxYyZ/ ]+,[-+0-9XxYyZ/ ]+' 'Allowed characters for use in symmetry operation such as 1/2-x,y,1/2-z'
559-
sequence_dep char '[a-zA-Z0-9\t \r\n\v\f\(\)]+$' 'Deposition specific one letter code'
559+
sequence_dep char '[a-zA-Z0-9\t \r\n()]+$' 'Deposition specific one letter code'
560560
date_dep char '([1-9][0-9](([02468][048])|([13579][26]))-02-29)|[1-9][0-9][0-9][0-9]-((((0[1-9])|(1[0-2]))-((0[1-9])|(1[0-9])|(2[0-8])))|((((0[13578])|(1[02]))-31)|(((0[1,3-9])|(1[0-2]))-(29|30))))' 'Deposition specific date with better checking'
561561

562562
#####################

tests/test_cif.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -391,12 +391,7 @@ def test_validation(self):
391391
msg_list = []
392392
ddl = cif.Ddl(logger=(lambda msg: msg_list.append(msg), 6))
393393
ddl.read_ddl(cif.read(full_path('mmcif_pdbx_v50_frag.dic')))
394-
self.assertEqual(len(msg_list), 1,
395-
msg=f'Messages:\n {"\n ".join(msg_list)}')
396-
for msg in msg_list:
397-
expected = "Bad DDL2: can't parse regex for 'binary':"
398-
self.assertTrue(msg.startswith(expected), msg=msg)
399-
msg_list = []
394+
self.assertEqual(msg_list, [])
400395
ddl.validate_cif(doc)
401396
self.assertEqual(msg_list,
402397
['[dummy_block] unknown tag _custom_tag',

0 commit comments

Comments
 (0)