Skip to content

Commit 63d143e

Browse files
committed
feat: hgnc_xlink.tsv is expected now in hpo folder (#170)
1 parent b89fc26 commit 63d143e

21 files changed

+182
-106
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ utoipa-swagger-ui = { version = "7.1.0", features = ["actix-web"] }
4545
utoipa = { version = "4.2", features = ["actix_extras", "chrono", "indexmap", "preserve_order", "yaml"] }
4646

4747
[build-dependencies]
48+
anyhow = "1.0"
4849
prost-build = "0.12"
4950

5051
[dev-dependencies]

README.md

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,29 @@ We fix ourselves to the release from 2023-06-06.
3434
```
3535
# RELEASE=2023-06-06
3636
# URL=https://github.com/obophenotype/human-phenotype-ontology/releases/download
37-
# NAMES="hp.obo phenotype.hpoa phenotype_to_genes.txt genes_to_phenotype.txt"
37+
# NAMES="hp-base.obo phenotype.hpoa phenotype_to_genes.txt genes_to_phenotype.txt"
3838
3939
# mkdir -p /tmp/data/hpo
4040
# for name in $NAMES; do \
4141
wget \
4242
-O /tmp/data/hpo/$name \
4343
$URL/v$RELEASE/$name;
4444
done
45+
# mv /tmp/data/hpo/hp-base.obo /tmp/data/hpo/hp.obo
46+
# sed -i -e 's|/hp-base.owl||' /tmp/data/hpo/hp.obo
47+
```
48+
49+
Next, generate the cross-link file between different gene identifiers.
50+
51+
```
52+
# wget -O /tmp/hgnc_complete_set.json \
53+
https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/json/hgnc_complete_set.json
54+
# echo -e "hgnc_id\tensembl_gene_id\tentrez_id\tgene_symbol" \
55+
> /tmp/data/hpo/hgnc_xlink.tsv
56+
# jq -r '.response.docs[] | select(.entrez_id != null) | [.hgnc_id, .ensembl_gene_id, .entrez_id, .symbol] | @tsv' \
57+
/tmp/hgnc_complete_set.json \
58+
| LC_ALL=C sort -t $'\t' -k3,3n \
59+
>> /tmp/data/hpo/hgnc_xlink.tsv
4560
```
4661

4762
You can now conver the downloaded text HPO files to a binary format which will improve performance of loading data.
@@ -57,46 +72,29 @@ You can now conver the downloaded text HPO files to a binary format which will i
5772
After having the precomputed data, you can startup the server as follows:
5873

5974
```
60-
# viguno run-server \
75+
# viguno server run \
6176
--path-hpo-dir tests/data/hpo
6277
INFO args_common = Args { verbose: Verbosity { verbose: 0, quiet: 0, phantom: PhantomData<clap_verbosity_flag::InfoLevel> } }
6378
INFO args = Args { path_hpo_dir: "tests/data/hpo", suppress_hints: false, listen_host: "127.0.0.1", listen_port: 8080 }
6479
INFO Loading HPO...
65-
INFO ...done loading HPO in 8.180012599s
66-
INFO Opening RocksDB for reading...
67-
INFO ...done opening RocksDB in 19.027133ms
80+
INFO attempting to load binary HPO file from tests/data/hpo
81+
INFO ...done loading HPO in 4.788750172s
82+
INFO Loading HGNC xlink...
83+
INFO ... done loading HGNC xlink in 156.362034ms
84+
INFO Loading HPO OBO...
85+
INFO ... done loading HPO OBO in 1.90213703s
86+
INFO Indexing OBO...
87+
INFO ... done indexing OBO in 835.558794ms
6888
INFO Launching server main on http://127.0.0.1:8080 ...
69-
INFO try: http://127.0.0.1:8080/hpo/genes?gene_symbol=TGDS
70-
INFO try: http://127.0.0.1:8080/hpo/genes?gene_id=23483&hpo_terms=true
71-
INFO try: http://127.0.0.1:8080/hpo/omims?omim_id=616145&hpo_terms=true
72-
INFO try: http://127.0.0.1:8080/hpo/terms?term_id=HP:0000023&genes=true
73-
INFO try: http://127.0.0.1:8080/hpo/sim/term-term?lhs=HP:0001166,HP:0040069&rhs=HP:0005918,HP:0004188
74-
INFO try: http://127.0.0.1:8080/hpo/sim/term-gene?terms=HP:0001166,HP:0000098&gene_symbols=FBN1,TGDS,TTN
75-
INFO starting 4 workers
89+
INFO SEE SWAGGER UI FOR INTERACTIVE DOCS: http://127.0.0.1:8080/swagger-ui/
90+
INFO starting 8 workers
7691
INFO Actix runtime found; starting in Actix runtime
7792
```
7893

7994
Now the server is running and you could stop it with `Ctrl-C`.
8095

81-
In another terminal, you then now do as suggested above.
82-
Note that we truncate the output JSON.
83-
84-
```
85-
# curl 'http://127.0.0.1:8080/hpo/genes?gene_symbol=TGDS'
86-
[{"gene_ncbi_id":23483,"gene_symbol":"TGDS"}]
87-
88-
# curl 'http://127.0.0.1:8080/hpo/genes?gene_id=23483&hpo_terms=true'
89-
[{"gene_ncbi_id":23483,"gene_symbol":"TGDS","hpo_terms":[{"term_...
90-
91-
# curl 'http://127.0.0.1:8080/hpo/omims?omim_id=616145&hpo_terms=true'
92-
[{"omim_id":"OMIM:616145","name":"Catel-Manzke syndrome","hpo_te...
93-
94-
# curl 'http://127.0.0.1:8080/hpo/terms?term_id=HP:0000023&genes=true'
95-
[{"term_id":"HP:0000023","name":"Inguinal hernia","genes":[{"gen...
96-
97-
# curl 'http://127.0.0.1:8080/hpo/sim/term-term?lhs=HP:0001166,HP:0040069&rhs=HP:0005918,HP:0004188'
98-
[{"lhs":"HP:0001166","rhs":"HP:0005918","score":1.4280319,"sim":...
99-
```
96+
You can go to http://127.0.0.1/swagger-ui to see the automatically generated interactive API documentation.
97+
You can find the OpenAPI YAML file for the `main` branch [here on GitHub](https://raw.githubusercontent.com/varfish-org/viguno/main/openapi.yaml) and e.g., open it [here in the public Swagger editor](https://editor.swagger.io?url=https://raw.githubusercontent.com/varfish-org/viguno/main/openapi.yaml).
10098

10199
# Developer Documentation
102100

build.rs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,28 @@
1-
// The custom build script, needed as we use protocolbuffers.
1+
use std::{env, path::PathBuf};
2+
3+
fn main() -> Result<(), anyhow::Error> {
4+
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("protos");
5+
let proto_files = vec!["viguno/v1/simulation.proto"]
6+
.iter()
7+
.map(|f| root.join(f))
8+
.collect::<Vec<_>>();
9+
10+
// Tell cargo to recompile if any of these proto files are changed
11+
for proto_file in &proto_files {
12+
println!("cargo:rerun-if-changed={}", proto_file.display());
13+
}
14+
15+
let descriptor_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("proto_descriptor.bin");
216

3-
fn main() {
417
prost_build::Config::new()
5-
.protoc_arg("-Isrc/proto")
18+
// Save descriptors to file
19+
.file_descriptor_set_path(&descriptor_path)
620
// Add serde serialization and deserialization to the generated code.
721
.type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]")
822
// Skip serializing `None` values.
923
.type_attribute(".", "#[serde_with::skip_serializing_none]")
1024
// Define the protobuf files to compile.
11-
.compile_protos(&["viguno/v1/simulation.proto"], &["src/"])
12-
.unwrap();
25+
.compile_protos(&proto_files, &[root])?;
26+
27+
Ok(())
1328
}

openapi.yaml

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ paths:
1414
tags:
1515
- hpo_genes
1616
summary: Query for genes in the HPO database.
17-
operationId: handle
17+
operationId: hpo_genes
1818
parameters:
1919
- name: gene_id
2020
in: query
@@ -63,7 +63,7 @@ paths:
6363
tags:
6464
- hpo_omims
6565
summary: Query for OMIM diseases in the HPO database.
66-
operationId: handle
66+
operationId: hpo_omims
6767
parameters:
6868
- name: omim_id
6969
in: query
@@ -107,6 +107,101 @@ paths:
107107
application/json:
108108
schema:
109109
$ref: '#/components/schemas/Result'
110+
/hpo/sim/term-gene:
111+
get:
112+
tags:
113+
- hpo_sim::term_gene
114+
summary: Query for similarity between a set of terms to each entry in a
115+
description: list of genes.
116+
operationId: hpo_sim_term_gene
117+
parameters:
118+
- name: terms
119+
in: query
120+
description: Set of terms to use as query.
121+
required: true
122+
schema:
123+
type: array
124+
items:
125+
type: string
126+
- name: gene_ids
127+
in: query
128+
description: The set of ids for genes to use as "database".
129+
required: false
130+
schema:
131+
type: array
132+
items:
133+
type: string
134+
nullable: true
135+
- name: gene_symbols
136+
in: query
137+
description: The set of symbols for genes to use as "database".
138+
required: false
139+
schema:
140+
type: array
141+
items:
142+
type: string
143+
nullable: true
144+
responses:
145+
'200':
146+
description: The query was successful.
147+
content:
148+
application/json:
149+
schema:
150+
$ref: '#/components/schemas/Result'
151+
/hpo/sim/term-term:
152+
get:
153+
tags:
154+
- hpo_sim::term_term
155+
summary: Query for pairwise term similarity.
156+
description: |-
157+
In the case of Resnik, this corresponds to `IC(MICA(t_1, t_2))`.
158+
159+
# Errors
160+
161+
In the case that there is an error running the server.
162+
operationId: hpo_sim_term_term
163+
parameters:
164+
- name: lhs
165+
in: query
166+
description: The one set of HPO terms to compute similarity for.
167+
required: true
168+
schema:
169+
type: array
170+
items:
171+
type: string
172+
- name: rhs
173+
in: query
174+
description: The second set of HPO terms to compute similarity for.
175+
required: true
176+
schema:
177+
type: array
178+
items:
179+
type: string
180+
- name: ic_base
181+
in: query
182+
description: What should information content be based on.
183+
required: false
184+
schema:
185+
$ref: '#/components/schemas/IcBasedOn'
186+
- name: similarity
187+
in: query
188+
description: The similarity method to use.
189+
required: false
190+
schema:
191+
$ref: '#/components/schemas/SimilarityMethod'
192+
- name: combiner
193+
in: query
194+
description: The score combiner.
195+
required: false
196+
schema:
197+
$ref: '#/components/schemas/ScoreCombiner'
198+
responses:
199+
'200':
200+
description: The query was successful.
201+
content:
202+
application/json:
203+
schema:
204+
$ref: '#/components/schemas/Result'
110205
/hpo/terms:
111206
get:
112207
tags:
@@ -116,7 +211,7 @@ paths:
116211
# Errors
117212
118213
In the case that there is an error running the server.
119-
operationId: handle
214+
operationId: hpo_terms
120215
parameters:
121216
- name: term_id
122217
in: query

src/query/mod.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@ pub struct Args {
1818
/// Path to the directory with the HPO files.
1919
#[arg(long, required = true)]
2020
pub path_hpo_dir: String,
21-
/// Path to the TSV file with the HGNC xlink data.
22-
#[arg(long, required = true)]
23-
pub path_hgnc_xlink: String,
2421

2522
/// Path to JSON file with the genes to rank.
2623
#[arg(long)]
@@ -318,7 +315,8 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow:
318315

319316
tracing::info!("Loading HGNC xlink...");
320317
let before_load_xlink = Instant::now();
321-
let ncbi_to_hgnc = crate::common::hgnc_xlink::load_ncbi_to_hgnc(&args.path_hgnc_xlink)?;
318+
let path_hgnc_xlink = format!("{}/hgnc_xlink.tsv", args.path_hpo_dir);
319+
let ncbi_to_hgnc = crate::common::hgnc_xlink::load_ncbi_to_hgnc(&path_hgnc_xlink)?;
322320
tracing::info!(
323321
"... done loading HGNC xlink in {:?}",
324322
before_load_xlink.elapsed()

src/server/run/hpo_genes.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ pub struct Result {
132132
/// Query for genes in the HPO database.
133133
#[allow(clippy::unused_async)]
134134
#[utoipa::path(
135+
operation_id = "hpo_genes",
135136
params(Query),
136137
responses(
137138
(status = 200, description = "The query was successful.", body = Result),

src/server/run/hpo_omims.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ pub struct Result {
145145
/// Query for OMIM diseases in the HPO database.
146146
#[allow(clippy::unused_async)]
147147
#[utoipa::path(
148+
operation_id = "hpo_omims",
148149
params(Query),
149150
responses(
150151
(status = 200, description = "The query was successful.", body = Result),

src/server/run/hpo_sim/term_gene.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ pub struct Query {
4949
/// list of genes.
5050
#[allow(clippy::unused_async)]
5151
#[utoipa::path(
52+
operation_id = "hpo_sim_term_gene",
5253
params(Query),
5354
responses(
5455
(status = 200, description = "The query was successful.", body = Result),

src/server/run/hpo_sim/term_term.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ pub struct ResultEntry {
105105
/// In the case that there is an error running the server.
106106
#[allow(clippy::unused_async)]
107107
#[utoipa::path(
108+
operation_id = "hpo_sim_term_term",
108109
params(RequestQuery),
109110
responses(
110111
(status = 200, description = "The query was successful.", body = Result),

src/server/run/hpo_terms.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ pub struct Result {
208208
#[allow(clippy::unused_async)]
209209
#[allow(clippy::too_many_lines)]
210210
#[utoipa::path(
211+
operation_id = "hpo_terms",
211212
params(Query),
212213
responses(
213214
(status = 200, description = "The query was successful.", body = Result),

0 commit comments

Comments
 (0)