32
32
class Query (metaclass = abc .ABCMeta ):
33
33
"""
34
34
A representation of a JSON query for the RCSB search API.
35
-
35
+
36
36
This is the abstract base class for all queries.
37
37
"""
38
38
@abc .abstractmethod
@@ -62,7 +62,7 @@ def __or__(self, query):
62
62
class SingleQuery (Query , metaclass = abc .ABCMeta ):
63
63
"""
64
64
A terminal query node for the RCSB search API.
65
-
65
+
66
66
Multiple :class:`SingleQuery` objects can be combined to
67
67
:class:`CompositeQuery`objects using the ``|`` and ``&`` operators.
68
68
@@ -77,7 +77,7 @@ def get_content(self):
77
77
class CompositeQuery (Query ):
78
78
"""
79
79
A group query node for the RCSB search API.
80
-
80
+
81
81
A composite query is an combination of other queries, combined
82
82
either with the `'and'` or `'or'` operator.
83
83
Usually, a :class:`CompositeQuery` will not be created by calling
@@ -98,11 +98,11 @@ def __init__(self, queries, operator):
98
98
f"Operator must be 'or' or 'and', not '{ operator } '"
99
99
)
100
100
self ._operator = operator
101
-
101
+
102
102
def get_content (self ):
103
103
"""
104
104
A dictionary representation of the query.
105
- This dictionary is the content of the ``'query'`` key in the
105
+ This dictionary is the content of the ``'query'`` key in the
106
106
JSON query.
107
107
108
108
Returns
@@ -133,13 +133,13 @@ class BasicQuery(SingleQuery):
133
133
The matching is not case-sensitive.
134
134
Logic combinations of terms is described
135
135
`here <https://search.rcsb.org/#basic-queries>`_.
136
-
136
+
137
137
Examples
138
138
--------
139
-
139
+
140
140
>>> query = BasicQuery("tc5b")
141
- >>> print(search(query))
142
- ['1L2Y', '8ANM ', '8ANH', '8ANG ', '8ANI ']
141
+ >>> print(sorted( search(query) ))
142
+ ['1L2Y', '8ANG ', '8ANH', '8ANI ', '8ANM ']
143
143
"""
144
144
def __init__ (self , term ):
145
145
super ().__init__ ()
@@ -207,7 +207,7 @@ class FieldQuery(SingleQuery):
207
207
208
208
Examples
209
209
--------
210
-
210
+
211
211
>>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
212
212
>>> print(sorted(search(query)))
213
213
['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
@@ -218,7 +218,7 @@ def __init__(self, field, molecular_definition=False, case_sensitive=False, **kw
218
218
self ._field = field
219
219
self ._mol_definition = molecular_definition
220
220
self ._case_sensitive = case_sensitive
221
-
221
+
222
222
if len (kwargs ) > 1 :
223
223
raise TypeError ("Only one operator must be given" )
224
224
elif len (kwargs ) == 1 :
@@ -228,7 +228,7 @@ def __init__(self, field, molecular_definition=False, case_sensitive=False, **kw
228
228
# No operator is given
229
229
self ._operator = "exists"
230
230
self ._value = None
231
-
231
+
232
232
if self ._operator not in [
233
233
"exact_match" ,
234
234
"contains_words" , "contains_phrase" ,
@@ -241,7 +241,7 @@ def __init__(self, field, molecular_definition=False, case_sensitive=False, **kw
241
241
f"Constructor got an unexpected keyword argument "
242
242
f"'{ self ._operator } '"
243
243
)
244
-
244
+
245
245
# Convert dates into ISO 8601
246
246
if isinstance (self ._value , datetime ):
247
247
self ._value = _to_isoformat (self ._value )
@@ -250,7 +250,7 @@ def __init__(self, field, molecular_definition=False, case_sensitive=False, **kw
250
250
_to_isoformat (val ) if isinstance (val , datetime ) else val
251
251
for val in self ._value
252
252
]
253
-
253
+
254
254
# Create dictionary for 'range' operator
255
255
if self ._operator == "range" :
256
256
self ._value = {
@@ -266,7 +266,7 @@ def __init__(self, field, molecular_definition=False, case_sensitive=False, **kw
266
266
"to" : self ._value [1 ],
267
267
"include_upper" : True
268
268
}
269
-
269
+
270
270
# Rename operators to names used in API
271
271
if self ._operator == "is_in" :
272
272
# 'in' is not an available parameter name in Python
@@ -326,7 +326,7 @@ class SequenceQuery(SingleQuery):
326
326
327
327
Examples
328
328
--------
329
-
329
+
330
330
>>> sequence = "NLYIQWLKDGGPSSGRPPPS"
331
331
>>> query = SequenceQuery(sequence, scope="protein", min_identity=0.8)
332
332
>>> print(sorted(search(query)))
@@ -338,12 +338,12 @@ def __init__(self, sequence, scope,
338
338
self ._target = _scope_to_target .get (scope .lower ())
339
339
if self ._target is None :
340
340
raise ValueError (f"'{ scope } ' is an invalid scope" )
341
-
341
+
342
342
if isinstance (sequence , NucleotideSequence ) and scope .lower () == "rna" :
343
343
self ._sequence = str (sequence ).replace ("T" , "U" )
344
344
else :
345
345
self ._sequence = str (sequence )
346
-
346
+
347
347
self ._min_identity = min_identity
348
348
self ._max_expect_value = max_expect_value
349
349
@@ -371,10 +371,10 @@ class MotifQuery(SingleQuery):
371
371
The type of the pattern.
372
372
scope : {'protein', 'dna', 'rna'}
373
373
The type of molecule to find.
374
-
374
+
375
375
Examples
376
376
--------
377
-
377
+
378
378
>>> query = MotifQuery(
379
379
... "C-x(2,4)-C-x(3)-[LIVMFYWC]-x(8)-H-x(3,5)-H.",
380
380
... "prosite",
@@ -416,7 +416,7 @@ class StructureQuery(SingleQuery):
416
416
strict : bool, optional
417
417
If true, structure comparison is strict, otherwise it is
418
418
relaxed.
419
-
419
+
420
420
Examples
421
421
--------
422
422
@@ -442,7 +442,7 @@ def __init__(self, pdb_id, chain=None, assembly=None, strict=True):
442
442
"entry_id" : pdb_id ,
443
443
"asym_id" : chain
444
444
}
445
-
445
+
446
446
self ._operator = "strict_shape_match" if strict \
447
447
else "relaxed_shape_match"
448
448
@@ -462,15 +462,15 @@ class Sorting:
462
462
def __init__ (self , field , descending = True ):
463
463
self ._field = field
464
464
self ._descending = descending
465
-
465
+
466
466
@property
467
467
def field (self ):
468
468
return self ._field
469
469
470
470
@property
471
471
def descending (self ):
472
472
return self ._descending
473
-
473
+
474
474
def get_content (self ):
475
475
"""
476
476
Get the sorting content, i.e. the data belonging to the
@@ -497,7 +497,7 @@ def get_content(self):
497
497
498
498
class Grouping (metaclass = abc .ABCMeta ):
499
499
"""
500
- A representation of the JSON grouping options of the RCSB search
500
+ A representation of the JSON grouping options of the RCSB search
501
501
API.
502
502
503
503
Parameters
@@ -521,7 +521,7 @@ def __init__(self, sort_by=None):
521
521
self ._sorting = sort_by
522
522
else :
523
523
self ._sorting = Sorting (sort_by )
524
-
524
+
525
525
@abc .abstractmethod
526
526
def get_content (self ):
527
527
"""
@@ -542,7 +542,7 @@ def get_content(self):
542
542
return {"ranking_criteria_type" : self ._sorting .get_content ()}
543
543
else :
544
544
return {}
545
-
545
+
546
546
@abc .abstractmethod
547
547
def is_compatible_return_type (self , return_type ):
548
548
"""
@@ -555,7 +555,7 @@ def is_compatible_return_type(self, return_type):
555
555
----------
556
556
return_type : str
557
557
The ``return_type`` attribute to be checked.
558
-
558
+
559
559
Returns
560
560
-------
561
561
is_compatible : bool
@@ -593,7 +593,7 @@ def get_content(self):
593
593
content = super ().get_content ()
594
594
content ["aggregation_method" ] = "matching_deposit_group_id"
595
595
return content
596
-
596
+
597
597
def is_compatible_return_type (self , return_type ):
598
598
return return_type == "entry"
599
599
@@ -640,7 +640,7 @@ def get_content(self):
640
640
content ["aggregation_method" ] = "sequence_identity"
641
641
content ["similarity_cutoff" ] = self ._similarity_cutoff
642
642
return content
643
-
643
+
644
644
def is_compatible_return_type (self , return_type ):
645
645
return return_type == "polymer_entity"
646
646
@@ -672,7 +672,7 @@ def get_content(self):
672
672
content = super ().get_content ()
673
673
content ["aggregation_method" ] = "matching_uniprot_accession"
674
674
return content
675
-
675
+
676
676
def is_compatible_return_type (self , return_type ):
677
677
return return_type == "polymer_entity"
678
678
@@ -685,9 +685,9 @@ def count(query, return_type="entry", group_by=None,
685
685
"""
686
686
Count PDB entries that meet the given query requirements,
687
687
via the RCSB search API.
688
-
688
+
689
689
This function requires an internet connection.
690
-
690
+
691
691
Parameters
692
692
----------
693
693
query : Query
@@ -719,17 +719,17 @@ def count(query, return_type="entry", group_by=None,
719
719
count : int
720
720
The total number of PDB IDs (or groups) that would be returned
721
721
by calling :func:`search()` using the same parameters.
722
-
722
+
723
723
Notes
724
724
-----
725
725
If `group_by` is set, the number of results may be lower than in an
726
726
ungrouped query, as grouping is not applicable to all structures.
727
727
For example a DNA structure has no associated *Uniprot* accession
728
728
and hence is omitted by :class:`UniprotGrouping`.
729
-
729
+
730
730
Examples
731
731
--------
732
-
732
+
733
733
>>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
734
734
>>> print(count(query))
735
735
9
@@ -742,9 +742,9 @@ def count(query, return_type="entry", group_by=None,
742
742
)
743
743
744
744
query_dict ["request_options" ]["return_counts" ] = True
745
-
745
+
746
746
r = requests .get (_search_url , params = {"json" : json .dumps (query_dict )})
747
-
747
+
748
748
if r .status_code == 200 :
749
749
if group_by is None :
750
750
return r .json ()["total_count" ]
@@ -766,9 +766,9 @@ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
766
766
"""
767
767
Get all PDB IDs that meet the given query requirements,
768
768
via the RCSB search API.
769
-
769
+
770
770
This function requires an internet connection.
771
-
771
+
772
772
Parameters
773
773
----------
774
774
query : Query
@@ -786,7 +786,7 @@ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
786
786
of non-polymeric entities is returned (e.g. ``'XXXX_1'``).
787
787
- ``'polymer_instance'``: The PDB ID appended with chain ID
788
788
(more exactly ``'asym_id'``) is returned (e.g. ``'XXXX.A'``).
789
-
789
+
790
790
range : tuple(int, int), optional
791
791
If this parameter is specified, only PDB IDs in this range
792
792
are selected from all matching PDB IDs and returned
@@ -832,7 +832,7 @@ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
832
832
returned.
833
833
This dictionary maps group identifiers to a list of all PDB IDs
834
834
belonging to this group.
835
-
835
+
836
836
Notes
837
837
-----
838
838
If `group_by` is set, the number of results may be lower than in an
@@ -846,7 +846,7 @@ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
846
846
847
847
Examples
848
848
--------
849
-
849
+
850
850
>>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
851
851
>>> print(sorted(search(query)))
852
852
['1EJG', '1I0T', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG', '7R0H']
@@ -894,7 +894,7 @@ def search(query, return_type="entry", range=None, sort_by=None, group_by=None,
894
894
}
895
895
896
896
r = requests .get (_search_url , params = {"json" : json .dumps (query_dict )})
897
-
897
+
898
898
if r .status_code == 200 :
899
899
if group_by is None or not return_groups :
900
900
return [result ["identifier" ] for result in r .json ()["result_set" ]]
@@ -926,9 +926,9 @@ def _initialize_query_dict(query, return_type, group_by, content_types):
926
926
"polymer_entity" , "non_polymer_entity" ,
927
927
]:
928
928
raise ValueError (f"'{ return_type } ' is an invalid return type" )
929
-
929
+
930
930
request_options = {}
931
-
931
+
932
932
if len (content_types ) == 0 :
933
933
raise ValueError ("At least one content type must be specified" )
934
934
for content_type in content_types :
0 commit comments