-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathuniprot-examples.sh
118 lines (88 loc) · 3.82 KB
/
uniprot-examples.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# --------------------
# UNIPROT EXAMPLES
# --------------------
# ---
# human proteins with disease and interpro
# ---
pq-up "in_human(P),annotation(P,A),disease_annotation(A),interpro(P,X)"
# ---
# diseases for a specific protein and their annotations
# ---
pq-up "protein_natural_variant_disease(uniprot:'Q15465',A,D),substitution(A,Seq)" "x(A,D,Seq)"
# link from a specific protein to its variant to associated disease and the dbsnp id for the variant
pq-up "protein_natural_variant_disease_dbsnp(uniprot:'Q15465',A,D,X)"
# all xrefs
pq-up "protein_natural_variant_disease_xref(uniprot:'Q15465',A,D,X)"
# all annotations and their types on a specific protein
pq-up "annotation(uniprot:'Q15465',A),rdf(A,rdf:type,T)"
# all sequence annotation types and their parent classification
pq-up "rdfs_subclass_of(T,up:'Sequence_Annotation'),subClassOf(T,Parent),rdf_is_iri(Parent)"
# GO, KW annotations
pq-up "classified_with(uniprot:'B5MCD5',T)"
# rhea annotations
pq-up "protein_has_catalyzed_reaction(uniprot:'A0A1H0K750',X)"
pq-up "protein_has_catalyzed_reaction(P,rhea:'17641')"
# ---
# FEDERATION
# ---
#
# we show federated queries in 2 directions, and alternating between two sparql endpoints as base
#
# 1a. EC->rhea->Protein, uniprot as base
pq-up-rhea "service(rhea,rhea_ec(R,enzyme:'4.1.1.19')),protein_has_catalyzed_reaction(P,R)"
# 1b. EC->rhea->Protein, rhea as base
pq-rhea-up "rhea_ec(X,enzyme:'4.1.1.19'),service(uniprot,protein_has_catalyzed_reaction(P,X))"
# 2a. Protein->Rhea->EC, rhea as base
pq-rhea-up "service(uniprot,protein_has_catalyzed_reaction(uniprot:'A0A1H0K750',X)),rhea_ec(X,Y)"
# 2b. Protein->Rhea->EC, uniprot as base
pq-up-rhea "protein_has_catalyzed_reaction(uniprot:'A0A1H0K750',X),service(rhea,rhea_ec(X,Y))"
# same logic but federating via sparqlprog
pq-up-rhea -e "(uniprot ?? protein_has_catalyzed_reaction(uniprot:'A0A1H0K750',X)),(rhea ?? rhea_ec(X,Y))"
pq-up-rhea -e "(rhea ?? rhea_ec(X,enzyme:'4.1.1.19')), (uniprot ?? protein_has_catalyzed_reaction(P,X))"
# human RHEA annotations
pq-up "in_reference_proteome(P),rdf(P,up:reviewed,Rev),in_taxon(P,uptaxon:'9606'),protein_has_catalyzed_reaction(P,R)" "row(P,R)"
# proteins associated with >1 disease
pq-up -l -L pref_label "protein_natural_variant_disease(P,V1,D1),protein_natural_variant_disease(P,V2,D2),D1\=D2"
# count of all proteins
$ pq-up "aggregate(count(P),protein(P),N)" "count(P)"
#262095770
# ---
# distinct databases used in xrefs
# ---
pq-up --distinct "database(X,D)" "x(D)"
# ---
# count of xrefs broken down by database
# ---
pq-up "aggregate_group(count(X),[D],database(X,D),N)"
# ---
# all MGI xrefs
# ---
pq-up "xref(P,X),database(X,updb:'MGI')"
# reviewed subset
pq-up "xref(P,X),database(X,updb:'MGI'),reviewed(P)"
# xrefs to OMIM (this will be both proteins and diseases)
pq-up -l "xref(P,X),database(X,updb:'MIM')"
# ---
# describe a specific mouse protein
# ---
pq-up describe http://purl.uniprot.org/uniprot/Q78E61
# ---
# all in a taxon
# ---
pq-up "protein(P),in_taxon(P,uptaxon:'7955')"
# ---
# all reference proteins in a genome
# ---
pq-up "in_reference_proteome(P),in_taxon(P,uptaxon:'7955')"
# ---
# zebrafish reference proteins with no ZFIN mapping
# ---
pq-up "in_reference_proteome(P),rdf(P,up:reviewed,Rev),in_taxon(P,uptaxon:'7955'),\+xref_in(P,X,'ZFIN')" "row(P,Rev)"
# wormbase genes
pq-up "in_reference_proteome(P),rdf(P,up:reviewed,Rev),in_taxon(P,uptaxon:'6239'),xref_in(P,X,'WormBase')"
pq-up "in_reference_proteome(P),rdf(P,up:reviewed,Rev),xref(P,X),database(X,D),is_wormbase(D)" "row(P,X)"
# all araport
pq-up "in_reference_proteome(P),rdf(P,up:reviewed,Rev),in_taxon(P,Tax),xref_in(P,X,'Araport')"
# https://douroucouli.wordpress.com/2020/08/05/what-is-the-sars-cov-2-molecular-parts-list/#comments
# peptides with descriptions
pq-up "peptide_annotation(Pep),rdf(Pep,rdfs:comment,D),annotation(P,Pep),in_taxon(P,uptaxon:'9606')"