Skip to content

Commit

Permalink
Merge branch 'develop' into develop_ehds2_cql
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidCroftDKFZ committed Nov 22, 2024
2 parents 28a88b1 + 49be488 commit f77cece
Show file tree
Hide file tree
Showing 20 changed files with 430 additions and 41 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# Samply.Focus v0.8.0 2024-11-04

In this release, we are supporting 4 types of SQL queries for Exliquid and Organoids

## Major changes
* Allowlist of SQL queries


# Samply.Focus v0.7.0 2024-09-24

In this release, we are extending the supported data backends beyond CQL-enabled FHIR stores. We now support PostgreSQL as well. Usage instructions are included in the Readme.

## Major changes
* PostgreSQL support added



# Focus -- 2023-02-08

This is the initial release of Focus, a task distribution application designed for working with Samply.Beam. Currently, only Samply.Blaze is supported as an endpoint, but other endpoints can easily be integrated.
23 changes: 13 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "focus"
version = "0.6.0"
version = "0.9.0"
edition = "2021"
license = "Apache-2.0"

Expand All @@ -11,34 +11,37 @@ base64 = "0.22.1"
reqwest = { version = "0.12", default-features = false, features = ["json", "default-tls"] }
serde = { version = "1.0.152", features = ["serde_derive"] }
serde_json = "1.0"
thiserror = "1.0.38"
thiserror = "2.0.3"
chrono = "0.4.31"
indexmap = "2.1.0"
tokio = { version = "1.25.0", default_features = false, features = ["signal", "rt-multi-thread", "macros"] }
tokio = { version = "1.25.0", default-features = false, features = ["signal", "rt-multi-thread", "macros"] }
beam-lib = { git = "https://github.com/samply/beam", branch = "develop", features = ["http-util"] }
laplace_rs = {git = "https://github.com/samply/laplace-rs.git", tag = "v0.3.0" }
laplace_rs = {git = "https://github.com/samply/laplace-rs.git", tag = "v0.4.0" }
uuid = "1.8.0"
rand = { default-features = false, version = "0.8.5" }
futures-util = { version = "0.3", default-features = false, features = ["std"] }
sqlx = { version = "0.7.4", features = [ "runtime-tokio", "postgres", "macros", "chrono"] }
sqlx-pgrow-serde = "0.2.0"
tryhard = "0.5"

# Logging
tracing = { version = "0.1.37", default_features = false }
tracing-subscriber = { version = "0.3.11", default_features = false, features = ["env-filter", "ansi"] }
tracing = { version = "0.1.37", default-features = false }
tracing-subscriber = { version = "0.3.11", default-features = false, features = ["env-filter", "ansi"] }

# Global variables
once_cell = "1.18"

# Command Line Interface
clap = { version = "4", default_features = false, features = ["std", "env", "derive", "help", "color"] }
clap = { version = "4", default-features = false, features = ["std", "env", "derive", "help", "color"] }

# Query via SQL
sqlx = { version = "0.8.2", features = [ "runtime-tokio", "postgres", "macros", "chrono", "rust_decimal", "uuid"], optional = true }
kurtbuilds_sqlx_serde = { version = "0.3.2", features = [ "json", "decimal", "chrono", "uuid"], optional = true }


[features]
default = []
bbmri = []
dktk = []
dktk = ["query-sql"]
query-sql = ["dep:sqlx", "dep:kurtbuilds_sqlx_serde"]

[dev-dependencies]
pretty_assertions = "1.4.0"
Expand Down
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,19 @@ PROJECTS_NO_OBFUSCATION = "exliquid;dktk_supervisors;exporter;ehds2" # Projects
QUERIES_TO_CACHE = "queries_to_cache.conf" # The path to a file containing base64 encoded queries whose results are to be cached. If not set, no results are cached
PROVIDER = "name" #EUCAIM provider name
PROVIDER_ICON = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABAQMAAAAl21bKAAAAA1BMVEUAAACnej3aAAAAAXRSTlMAQObYZgAAAApJREFUCNdjYAAAAAIAAeIhvDMAAAAASUVORK5CYII=" # Base64 encoded EUCAIM provider icon
AUTH_HEADER = "ApiKey XXXX" #Authorization header
AUTH_HEADER = "ApiKey XXXX" #Authorization header; if the endpoint type is Blaze or BlazeAndSql, this header is used for the Exporter target application, and the syntax is AUTH_HEADER = "XXXX" where "XXXX" is the API key
```

In order to use Postgres querying, a Docker image built with the feature "dktk" needs to be used and this optional variable set:
```bash
POSTGRES_CONNECTION_STRING = "postgresql://postgres:Test.123@localhost:5432/postgres" # Postgres connection string
```

Additionally when using Postgres this optional variable can be set:
```bash
MAX_DB_ATTEMPTS = "8" # Max number of attempts to connect to the database; default value: 8
```

Obfuscating zero counts is by default switched off. To enable obfuscating zero counts, set the env. variable `OBFUSCATE_ZERO`.

Optionally, you can provide the `TLS_CA_CERTIFICATES_DIR` environment variable to add additional trusted certificates, e.g., if you have a TLS-terminating proxy server in place. The application respects the `HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`, `NO_PROXY`, and their respective lowercase equivalents.
Expand All @@ -81,9 +90,9 @@ Creating a sample task containing an abstract syntax tree (AST) query using curl
curl -v -X POST -H "Content-Type: application/json" --data '{"id":"7fffefff-ffef-fcff-feef-feffffffffff","from":"app1.proxy1.broker","to":["app1.proxy1.broker"],"ttl":"10s","failure_strategy":{"retry":{"backoff_millisecs":1000,"max_tries":5}},"metadata":{"project":"bbmri"},"body":"eyJsYW5nIjoiYXN0IiwicGF5bG9hZCI6ImV5SmhjM1FpT25zaWIzQmxjbUZ1WkNJNklrOVNJaXdpWTJocGJHUnlaVzRpT2x0N0ltOXdaWEpoYm1RaU9pSkJUa1FpTENKamFHbHNaSEpsYmlJNlczc2liM0JsY21GdVpDSTZJazlTSWl3aVkyaHBiR1J5Wlc0aU9sdDdJbXRsZVNJNkltZGxibVJsY2lJc0luUjVjR1VpT2lKRlVWVkJURk1pTENKemVYTjBaVzBpT2lJaUxDSjJZV3gxWlNJNkltMWhiR1VpZlN4N0ltdGxlU0k2SW1kbGJtUmxjaUlzSW5SNWNHVWlPaUpGVVZWQlRGTWlMQ0p6ZVhOMFpXMGlPaUlpTENKMllXeDFaU0k2SW1abGJXRnNaU0o5WFgxZGZWMTlMQ0pwWkNJNkltRTJaakZqWTJZekxXVmlaakV0TkRJMFppMDVaRFk1TFRSbE5XUXhNelZtTWpNME1DSjkifQ=="}' -H "Authorization: ApiKey app1.proxy1.broker App1Secret" http://localhost:8081/v1/tasks
```

Creating a sample SQL task for a `SELECT_TABLES` query using curl:
Creating a sample SQL task for a `SELECT_TEST` query using curl:
```bash
curl -v -X POST -H "Content-Type: application/json" --data '{"id":"7fffefff-ffef-fcff-feef-feffffffffff","from":"app1.proxy1.broker","to":["app1.proxy1.broker"],"ttl":"10s","failure_strategy":{"retry":{"backoff_millisecs":1000,"max_tries":5}},"metadata":{"project":"exliquid"},"body":"eyJwYXlsb2FkIjoiU0VMRUNUX1RBQkxFUyJ9"}' -H "Authorization: ApiKey app1.proxy1.broker App1Secret" http://localhost:8081/v1/tasks
curl -v -X POST -H "Content-Type: application/json" --data '{"id":"7fffefff-ffef-fcff-feef-feffffffffff","from":"app1.proxy1.broker","to":["app1.proxy1.broker"],"ttl":"10s","failure_strategy":{"retry":{"backoff_millisecs":1000,"max_tries":5}},"metadata":{"project":"exliquid"},"body":"eyJwYXlsb2FkIjoiU0VMRUNUX1RFU1QifQ=="}' -H "Authorization: ApiKey app1.proxy1.broker App1Secret" http://localhost:8081/v1/tasks
```

Creating a sample [Exporter](https://github.com/samply/exporter) "execute" task containing an Exporter query using curl:
Expand Down
5 changes: 5 additions & 0 deletions resources/cql/DHKI_STRAT_ENCOUNTER_STRATIFIER
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
define Encounter:
if InInitialPopulation then [Encounter] else {} as List<Encounter>

define function Departments(encounter FHIR.Encounter):
encounter.identifier.where(system = 'http://dktk.dkfz.de/fhir/sid/hki-department').value.first()
5 changes: 5 additions & 0 deletions resources/cql/DHKI_STRAT_MEDICATION_STRATIFIER
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
define MedicationStatement:
if InInitialPopulation then [MedicationStatement] else {} as List <MedicationStatement>

define function AppliedMedications(medication FHIR.MedicationStatement):
medication.medication.coding.code.last()
8 changes: 8 additions & 0 deletions resources/cql/DHKI_STRAT_SPECIMEN_STRATIFIER
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
define Specimen:
if InInitialPopulation then [Specimen] else {} as List<Specimen>

define function SampleType(specimen FHIR.Specimen):
specimen.type.coding.where(system = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType').code.first()

define function SampleSubtype(specimen FHIR.Specimen):
specimen.type.text.first()
9 changes: 8 additions & 1 deletion resources/cql/DKTK_STRAT_AGE_STRATIFIER
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,12 @@ from [Condition] C
where C.extension.where(url='http://hl7.org/fhir/StructureDefinition/condition-related').empty() and C.onset is not null
sort by date from onset asc)

define FirstDiagnosis:
First(
from [Condition] C
sort by date from onset asc)

define AgeClass:
if (PrimaryDiagnosis.onset is null) then 'unknown' else ToString((AgeInYearsAt(FHIRHelpers.ToDateTime(PrimaryDiagnosis.onset)) div 10) * 10)
if (PrimaryDiagnosis.onset is null)
then ToString((AgeInYearsAt(FHIRHelpers.ToDateTime(FirstDiagnosis.onset)) div 10) * 10)
else ToString((AgeInYearsAt(FHIRHelpers.ToDateTime(PrimaryDiagnosis.onset)) div 10) * 10)
5 changes: 5 additions & 0 deletions resources/cql/DKTK_STRAT_GENETIC_VARIANT
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
define GeneticVariantCount:
if InInitialPopulation then [Observation: Code '69548-6' from loinc] else {} as List <Observation>

define GeneticVariantCode:
First (from [Observation: Code '69548-6' from loinc] O return O.component.where(code.coding contains Code '48018-6' from loinc).value.coding.code.first())
43 changes: 43 additions & 0 deletions resources/sql/EXLIQUID_SAMPLE_3LEVELS
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
Exliquid query for sites with 'legacy' exliquid specimen documentation (3 level hierarchy versus 'virtual' mother sample).
For current expected documentation see: https://wiki.verbis.dkfz.de/pages/viewpage.action?pageId=294716167.
*/
with t as (
select
(s.resource ->> 'id')::text s_id,
(s_coding ->> 'code')::text sample_type
from specimen s, jsonb_array_elements(s.resource -> 'type' -> 'coding') as s_coding
where s_coding ->> 'system' = 'https://fhir.bbmri.de/CodeSystem/SampleMaterialType'
),
t2 as (
SELECT
s_ali.resource ->> 'id' s_ali_id,
sample_type_ali.sample_type as s_ali_type,
(s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_amountrest,
s_ali_grp.resource ->> 'id' s_ali_grp_id,
sample_type_ali_grp.sample_type as s_ali_grp_type,
(s_ali_grp.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_ali_grp_amountrest,
s_mother.resource ->> 'id' s_mother_id,
sample_type_mother.sample_type as s_mother_type,
(s_mother.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float s_mother_amountrest,
s_mother.resource -> 'subject' ->> 'reference' as patient_id
FROM specimen s_ali
JOIN specimen s_ali_grp ON (s_ali.resource->'parent'->0->>'reference')::text = (s_ali_grp.resource->>'resourceType')::text || '/' || (s_ali_grp.resource->>'id')::text
JOIN specimen s_mother ON (s_ali_grp.resource->'parent'->0->>'reference')::text = (s_mother.resource->>'resourceType')::text || '/' || (s_mother.resource->>'id')::text
join t as sample_type_ali on s_ali.resource ->> 'id' = sample_type_ali.s_id
join t as sample_type_ali_grp on s_ali_grp.resource ->> 'id' = sample_type_ali_grp.s_id
join t as sample_type_mother on s_mother.resource ->> 'id' = sample_type_mother.s_id
where (s_ali.resource -> 'container' -> 0 -> 'specimenQuantity' ->> 'value')::float > 0
),
t3 as (
select distinct
t2.patient_id,
c.resource -> 'code' -> 'coding' -> 0 ->> 'code' icd10_code,
c.resource -> 'code' ->> 'text' diag_desc,
t2.s_mother_type
from t2
join condition c on t2.patient_id = c.resource -> 'subject' ->> 'reference'
)
select icd10_code, diag_desc, count(distinct patient_id) patient_count, s_mother_type, count(s_mother_type) sample_count
from t3
group by icd10_code, diag_desc, patient_id, s_mother_type;
1 change: 0 additions & 1 deletion resources/sql/SELECT_TABLES

This file was deleted.

1 change: 1 addition & 0 deletions resources/sql/SELECT_TEST
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 10 AS VALUE, quote_literal('Hello Rustaceans') AS GREETING, 4.7 as FLOATY, CURRENT_DATE AS TODAY;
78 changes: 78 additions & 0 deletions resources/sql/SIORGP_PUBLIC_MAIN
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
SIorgP MetPredict project
The approach chosen here is to minimize the number of tasks generated and thus network traffic via Beam
=> one large query that returns the most necessary fields over multiple smaller queries
*/
with t as (
select
o.resource->'subject'->>'reference' as pat_ref,
o.resource->'code'->'coding'->0->>'code' as crf,
component->'code'->'coding'->0->>'code' AS code,
COALESCE(
component->'valueCodeableConcept'->'coding'->0->>'code',
component->>'valueDateTime',
component->'valueQuantity'->>'value',
component->>'valueString'
) AS value
FROM
observation o ,
jsonb_array_elements(o.resource->'component') AS component
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP%'
),
t2 AS (
select t.value as pat_pseudonym,
-- t.crf,
p.resource->>'gender' as gender,
p.resource->>'birthDate' as birth_date,
t5.value as organoid_id,
t2.value as location_primary_tumor,
t7.value as location_primary_tumor_precise,
t3.value as therapy,
t4.value as metastases_therapy,
t6.value::integer as age_at_enrollment
from t
left join t t2 on t.pat_ref = t2.pat_ref and t2.code='SIOP_LOCALISATION_PRIMARY_TUMOR'
left join t t3 on t.pat_ref = t3.pat_ref and t3.code='SIOP_NEOADJ_T_RECTAL_CARCINOMA'
left join t t4 on t.pat_ref = t4.pat_ref and t4.code='SIOP_NEOADJ_CTX_MET'
left join t t5 on t.pat_ref = t5.pat_ref and t5.code like 'SIOP_SAMPLE_M0%_PSEUDONYM'
left join t t6 on t.pat_ref = t6.pat_ref and t6.code='SIOP_AGE_STUDY_ENROLLMENT'
left join t t7 on t.pat_ref = t7.pat_ref and t7.code='SIOP_LOCALISATION_PRIMARY_TUMOR_COLON'
left join patient p on t.pat_ref = 'Patient/' || (p.resource->>'id')::text
where t.crf like 'SIOrgP - MetPredict - Visite 1%' and t.code = 'SIOP_PATIENT_PSEUDONYM'
),
t8 as (
select pat_pseudonym, count(distinct organoid_id) n_organoids
from t2
group by pat_pseudonym
)
-- patients having <= 3 organoids
select 'MetPredict' as project, 'pat_pdos_leq_3' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids <= 3) as value
union
-- patients having 4 organoids
select 'MetPredict' as project, 'pat_pdos_4' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 4) as value
union
-- patients having 5 organoids
select 'MetPredict' as project, 'pat_pdos_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids = 5) as value
union
-- patients having > 5 organoids
select 'MetPredict' as project, 'pat_pdos_gt_5' as field, (select count(distinct pat_pseudonym) from t8 where n_organoids > 5) as value
union
-- the total number of patients
select 'MetPredict' as project, 'n_patients' as field, (select count(distinct pat_pseudonym) from t2) as value
union
-- the total number of organoids
select 'MetPredict' as project, 'n_organoids' as field, (select count(distinct organoid_id) from t2) as value
union
select 'MetPredict' as project, 'gender_male' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'male') as value
union
select 'MetPredict' as project, 'gender_female' as field, (select count(distinct pat_pseudonym) from t2 where gender = 'female') as value
union
select 'MetPredict' as project, '<=30' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment <= 30) as value
union
select 'MetPredict' as project, '31-40' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 31 and age_at_enrollment <= 40) as value
union
select 'MetPredict' as project, '41-50' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 41 and age_at_enrollment <= 50) as value
union
select 'MetPredict' as project, '51-60' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 51 and age_at_enrollment <= 60) as value
union
select 'MetPredict' as project, '>=61' as field, (select count(distinct pat_pseudonym) from t2 where age_at_enrollment >= 61) as value;
5 changes: 5 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NPAT
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Test query. Number of patients that have a documented visit 1
select count(distinct p.resource)
from observation o
join patient p on o.resource->'subject'->>'reference' = 'Patient/' || (p.resource->>'id')::text
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 1%';
4 changes: 4 additions & 0 deletions resources/sql/SIORGP_PUBLIC_NVISIT2B
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Test query: Number of observations for visit 2b as a lower bound for the number of expected organoids
select count(o)
from observation o
where o.resource->'code'->'coding'->0->>'code' like 'SIOrgP - MetPredict - Visite 2b%';
Loading

0 comments on commit f77cece

Please sign in to comment.