diff --git a/src/main/java/org/ohdsi/circe/cohortdefinition/CohortExpressionQueryBuilder.java b/src/main/java/org/ohdsi/circe/cohortdefinition/CohortExpressionQueryBuilder.java index bc0c4380..8431ef5d 100644 --- a/src/main/java/org/ohdsi/circe/cohortdefinition/CohortExpressionQueryBuilder.java +++ b/src/main/java/org/ohdsi/circe/cohortdefinition/CohortExpressionQueryBuilder.java @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; +import java.util.stream.IntStream; import org.apache.commons.lang3.StringUtils; import org.ohdsi.circe.cohortdefinition.builders.BuilderOptions; import org.ohdsi.circe.cohortdefinition.builders.CriteriaSqlBuilder; @@ -71,6 +72,7 @@ public class CohortExpressionQueryBuilder implements IGetCriteriaSqlDispatcher, private final static String PRIMARY_CRITERIA_EVENTS_TABLE = "primary_events"; private final static String INCLUSION_RULE_QUERY_TEMPLATE = ResourceHelper.GetResourceAsString("/resources/cohortdefinition/sql/inclusionrule.sql"); + private final static String INCLUSION_RULE_TEMP_TABLE_TEMPLATE = ResourceHelper.GetResourceAsString("/resources/cohortdefinition/sql/inclusionRuleTempTable.sql"); private final static String CENSORING_QUERY_TEMPLATE = ResourceHelper.GetResourceAsString("/resources/cohortdefinition/sql/censoringInsert.sql"); private final static String EVENT_TABLE_EXPRESSION_TEMPLATE = ResourceHelper.GetResourceAsString("/resources/cohortdefinition/sql/eventTableExpression.sql"); @@ -255,6 +257,17 @@ public String getFinalCohortQuery(Period censorWindow) { return query; } + private String getInclusionRuleTableSql(CohortExpression expression) { + String EMPTY_TABLE = "CREATE TABLE #inclusion_rules (rule_sequence int);"; + if (expression.inclusionRules.size() == 0 ) return EMPTY_TABLE; + + String UNION_TEMPLATE = "SELECT CAST(%d as int) as rule_sequence"; + List unionList = IntStream.range(0,expression.inclusionRules.size()) + .mapToObj(i -> (String)String.format(UNION_TEMPLATE, i)) + .collect(Collectors.toList()); + + return StringUtils.replace(INCLUSION_RULE_TEMP_TABLE_TEMPLATE, "@inclusionRuleUnions", StringUtils.join(unionList, " UNION ALL ")); + } private String getInclusionAnalysisQuery(String eventTable, int modeId) { String resultSql = COHORT_INCLUSION_ANALYSIS_TEMPALTE; resultSql = StringUtils.replace(resultSql, "@inclusionImpactMode", Integer.toString(modeId)); @@ -353,6 +366,7 @@ public String buildExpressionQuery(CohortExpression expression, BuildExpressionQ resultSql = StringUtils.replace(resultSql, "@eraconstructorpad", Integer.toString(expression.collapseSettings.eraPad)); + resultSql = StringUtils.replace(resultSql, "@inclusionRuleTable", getInclusionRuleTableSql(expression)); resultSql = StringUtils.replace(resultSql, "@inclusionImpactAnalysisByEventQuery", getInclusionAnalysisQuery("#qualified_events", 0)); resultSql = StringUtils.replace(resultSql, "@inclusionImpactAnalysisByPersonQuery", getInclusionAnalysisQuery("#best_events", 1)); diff --git a/src/main/resources/resources/cohortdefinition/sql/cohortCensoredStats.sql b/src/main/resources/resources/cohortdefinition/sql/cohortCensoredStats.sql index 8d9f9710..870e1672 100644 --- a/src/main/resources/resources/cohortdefinition/sql/cohortCensoredStats.sql +++ b/src/main/resources/resources/cohortdefinition/sql/cohortCensoredStats.sql @@ -1,5 +1,4 @@ -- calculate censored -delete from @results_database_schema.cohort_censor_stats where @cohort_id_field_name = @target_cohort_id; insert into @results_database_schema.cohort_censor_stats (@cohort_id_field_name, lost_count) select @target_cohort_id as @cohort_id_field_name, coalesce(FCC.total_people - TC.total, 0) as lost_count FROM diff --git a/src/main/resources/resources/cohortdefinition/sql/cohortInclusionAnalysis.sql b/src/main/resources/resources/cohortdefinition/sql/cohortInclusionAnalysis.sql index 85a1d081..8ce52def 100644 --- a/src/main/resources/resources/cohortdefinition/sql/cohortInclusionAnalysis.sql +++ b/src/main/resources/resources/cohortdefinition/sql/cohortInclusionAnalysis.sql @@ -15,8 +15,8 @@ group by inclusion_rule_mask -- calculate gain counts delete from @results_database_schema.cohort_inclusion_stats where @cohort_id_field_name = @target_cohort_id and mode_id = @inclusionImpactMode; insert into @results_database_schema.cohort_inclusion_stats (@cohort_id_field_name, rule_sequence, person_count, gain_count, person_total, mode_id) -select ir.@cohort_id_field_name, ir.rule_sequence, coalesce(T.person_count, 0) as person_count, coalesce(SR.person_count, 0) gain_count, EventTotal.total, @inclusionImpactMode as mode_id -from @results_database_schema.cohort_inclusion ir +select @target_cohort_id as @cohort_id_field_name, ir.rule_sequence, coalesce(T.person_count, 0) as person_count, coalesce(SR.person_count, 0) gain_count, EventTotal.total, @inclusionImpactMode as mode_id +from #inclusion_rules ir left join ( select i.inclusion_rule_id, count_big(i.event_id) as person_count @@ -24,10 +24,9 @@ left join JOIN #inclusion_events i on Q.person_id = I.person_id and Q.event_id = i.event_id group by i.inclusion_rule_id ) T on ir.rule_sequence = T.inclusion_rule_id -CROSS JOIN (select count(*) as total_rules from @results_database_schema.cohort_inclusion where @cohort_id_field_name = @target_cohort_id) RuleTotal +CROSS JOIN (select count(*) as total_rules from #inclusion_rules) RuleTotal CROSS JOIN (select count_big(event_id) as total from @eventTable) EventTotal LEFT JOIN @results_database_schema.cohort_inclusion_result SR on SR.mode_id = @inclusionImpactMode AND SR.@cohort_id_field_name = @target_cohort_id AND (POWER(cast(2 as bigint),RuleTotal.total_rules) - POWER(cast(2 as bigint),ir.rule_sequence) - 1) = SR.inclusion_rule_mask -- POWER(2,rule count) - POWER(2,rule sequence) - 1 is the mask for 'all except this rule' -WHERE ir.@cohort_id_field_name = @target_cohort_id ; -- calculate totals @@ -38,7 +37,7 @@ FROM (select count_big(event_id) as total from @eventTable) PC, (select sum(sr.person_count) as total from @results_database_schema.cohort_inclusion_result sr - CROSS JOIN (select count(*) as total_rules from @results_database_schema.cohort_inclusion where @cohort_id_field_name = @target_cohort_id) RuleTotal + CROSS JOIN (select count(*) as total_rules from #inclusion_rules) RuleTotal where sr.mode_id = @inclusionImpactMode and sr.@cohort_id_field_name = @target_cohort_id and sr.inclusion_rule_mask = POWER(cast(2 as bigint),RuleTotal.total_rules)-1 ) FC ; diff --git a/src/main/resources/resources/cohortdefinition/sql/generateCohort.sql b/src/main/resources/resources/cohortdefinition/sql/generateCohort.sql index 33454cff..081c619c 100644 --- a/src/main/resources/resources/cohortdefinition/sql/generateCohort.sql +++ b/src/main/resources/resources/cohortdefinition/sql/generateCohort.sql @@ -118,7 +118,16 @@ INSERT INTO @target_database_schema.@target_cohort_table (@cohort_id_field_name, @finalCohortQuery ; -{@generateStats != 0}?{ +-- BEGIN: Censored Stats + +delete from @results_database_schema.cohort_censor_stats where @cohort_id_field_name = @target_cohort_id; +@cohortCensoredStatsQuery +-- END: Censored Stats + +{@generateStats != 0 & @ruleTotal != 0}?{ + +@inclusionRuleTable + -- Find the event that is the 'best match' per person. -- the 'best match' is defined as the event that satisfies the most inclusion rules. -- ties are solved by choosing the event that matches the earliest inclusion rule, and then earliest. @@ -151,13 +160,11 @@ WHERE ranked.rank_value = 1 @inclusionImpactAnalysisByPersonQuery -- END: Inclusion Impact Analysis - person --- BEGIN: Censored Stats -@cohortCensoredStatsQuery --- END: Censored Stats - TRUNCATE TABLE #best_events; DROP TABLE #best_events; +TRUNCATE TABLE #inclusion_rules; +DROP TABLE #inclusion_rules; } @strategy_ends_cleanup diff --git a/src/main/resources/resources/cohortdefinition/sql/inclusionRuleTempTable.sql b/src/main/resources/resources/cohortdefinition/sql/inclusionRuleTempTable.sql new file mode 100644 index 00000000..e0465bfb --- /dev/null +++ b/src/main/resources/resources/cohortdefinition/sql/inclusionRuleTempTable.sql @@ -0,0 +1,7 @@ +-- Create a temp table of inclusion rule rows for joining in the inclusion rule impact analysis + +select cast(rule_sequence as int) as rule_sequence +into #inclusion_rules +from ( + @inclusionRuleUnions +) IR; diff --git a/src/test/java/org/ohdsi/circe/cohortdefinition/CohortGeneration_5_0_0_Test.java b/src/test/java/org/ohdsi/circe/cohortdefinition/CohortGeneration_5_0_0_Test.java index 9607b358..cafd0490 100644 --- a/src/test/java/org/ohdsi/circe/cohortdefinition/CohortGeneration_5_0_0_Test.java +++ b/src/test/java/org/ohdsi/circe/cohortdefinition/CohortGeneration_5_0_0_Test.java @@ -421,6 +421,49 @@ public void testLimits() throws Exception { * Inclusion rule tests */ + @Test + public void testSimpleInclusionRule() throws Exception { + final String RESULTS_SCHEMA = "simpleInclusionRule"; + final String[] testDataSetsPrep = new String[] { + "/datasets/vocabulary.json", + "/cohortgeneration/inclusionRules/simpleInclusionRule_PREP.json" + }; + final IDatabaseConnection dbUnitCon = getConnection(); + + // prepare results schema for the specified options.resultSchema + prepareSchema(RESULTS_SCHEMA, RESULTS_DDL_PATH); + + // load test data into DB. + final IDataSet dsPrep = DataSetFactory.createDataSet(testDataSetsPrep); + DatabaseOperation.CLEAN_INSERT.execute(dbUnitCon, dsPrep); // clean load of the DB. Careful, clean means "delete the old stuff" + + CohortExpressionQueryBuilder.BuildExpressionQueryOptions options; + CohortExpression expression; + String cohortSql; + + // load the default expression, which looks for the event with exactly 0 conceptSet = 1 between all days beore and 0 days before index + // cohort 1 will use the default expression from JSON. + expression = CohortExpression.fromJson(ResourceHelper.GetResourceAsString("/cohortgeneration/inclusionRules/simpleInclusionRule.json")); + options = buildExpressionQueryOptions(1, RESULTS_SCHEMA); + cohortSql = buildExpressionSql(expression, options); + // execute on database, expect no errors + jdbcTemplate.batchUpdate(SqlSplit.splitSql(cohortSql)); + + // Validate results + // Load actual records from cohort table + final ITable cohortTable = dbUnitCon.createQueryTable(RESULTS_SCHEMA + ".cohort", String.format("SELECT * from %s ORDER BY cohort_definition_id, subject_id, cohort_start_date", RESULTS_SCHEMA + ".cohort")); + final ITable censorStatsTable = dbUnitCon.createQueryTable(RESULTS_SCHEMA + ".cohort_inclusion_result", String.format("SELECT * from %s ORDER BY cohort_definition_id, mode_id, inclusion_rule_mask", RESULTS_SCHEMA + ".cohort_inclusion_result")); + final IDataSet actualDataSet = new CompositeDataSet(new ITable[] {cohortTable, censorStatsTable}); + + // Load expected data from an XML dataset + final String[] testDataSetsVerify = new String[] {"/cohortgeneration/inclusionRules/simpleInclusionRule_VERIFY.json"}; + final IDataSet expectedDataSet = DataSetFactory.createDataSet(testDataSetsVerify); + + // Assert actual database table match expected table + Assertion.assertEquals(expectedDataSet, actualDataSet); + + } + /** * Exit strategies and censoring events */ diff --git a/src/test/java/org/ohdsi/circe/cohortdefinition/CohortOptionsTest.java b/src/test/java/org/ohdsi/circe/cohortdefinition/CohortOptionsTest.java index 23c5e474..1a5ab366 100644 --- a/src/test/java/org/ohdsi/circe/cohortdefinition/CohortOptionsTest.java +++ b/src/test/java/org/ohdsi/circe/cohortdefinition/CohortOptionsTest.java @@ -32,8 +32,7 @@ private void checkCohortFieldSql(String expressionSql, String cohortFieldName) { // check inserts for cohort_inclusion_stats, mode = 0 assertThat(expressionSql, containsString(format("delete from @results_database_schema.cohort_inclusion_stats where %s = @target_cohort_id and mode_id = 0;", cohortFieldName))); assertThat(expressionSql, matchesPattern(buildPattern(format(".*insert into @results_database_schema\\.cohort_inclusion_stats \\(%s,.*" - + "select ir\\.%s,.*0 as mode_id.*" - + "CROSS JOIN \\(.+where %s = @target_cohort_id\\).*WHERE ir.%s = @target_cohort_id.+", cohortFieldName, cohortFieldName, cohortFieldName, cohortFieldName)))); + + "select .*0 as mode_id.+", cohortFieldName)))); // check inserts for cohort_summary_stats, mode = 0 assertThat(expressionSql, containsString(format("delete from @results_database_schema.cohort_summary_stats where %s = @target_cohort_id and mode_id = 0;", cohortFieldName))); @@ -48,8 +47,7 @@ private void checkCohortFieldSql(String expressionSql, String cohortFieldName) { // check inserts for cohort_inclusion_stats, mode = 1 assertThat(expressionSql, containsString(format("delete from @results_database_schema.cohort_inclusion_stats where %s = @target_cohort_id and mode_id = 1;", cohortFieldName))); assertThat(expressionSql, matchesPattern(buildPattern(format(".*insert into @results_database_schema\\.cohort_inclusion_stats \\(%s,.+" - + "select ir\\.%s,.+, 1 as mode_id.+" - + "CROSS JOIN \\(.+where %s = @target_cohort_id.*\\).+WHERE ir.%s = @target_cohort_id.+", cohortFieldName, cohortFieldName, cohortFieldName, cohortFieldName)))); + + "select .*1 as mode_id.+", cohortFieldName)))); // check inserts for cohort_summary_stats, mode = 1 assertThat(expressionSql, containsString(format("delete from @results_database_schema.cohort_summary_stats where %s = @target_cohort_id and mode_id = 1;", cohortFieldName))); @@ -70,6 +68,9 @@ public void checkCohortFieldName() { expression.conceptSets = new ConceptSet[0]; expression.primaryCriteria = new PrimaryCriteria(); expression.primaryCriteria.observationWindow = new ObservationFilter(); + InclusionRule inclusionRule = new InclusionRule(); + inclusionRule.expression = new CriteriaGroup(); + expression.inclusionRules.add(inclusionRule); CohortExpressionQueryBuilder builder = new CohortExpressionQueryBuilder(); diff --git a/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule.json b/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule.json new file mode 100644 index 00000000..dc86948e --- /dev/null +++ b/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule.json @@ -0,0 +1,111 @@ +{ + "ConceptSets": [ + { + "id": 0, + "name": "Entry Concept Set", + "expression": { + "items": [ + { + "concept": { + "CONCEPT_CLASS_ID": "Clinical Finding", + "CONCEPT_CODE": "C1P1", + "CONCEPT_ID": 2, + "CONCEPT_NAME": "Child 1 [Parent: 1]", + "DOMAIN_ID": "CONDITION", + "INVALID_REASON": "V", + "INVALID_REASON_CAPTION": "Valid", + "STANDARD_CONCEPT": "S", + "STANDARD_CONCEPT_CAPTION": "Standard", + "VOCABULARY_ID": "TestVocab" + }, + "includeDescendants": true + } + ] + } + }, + { + "id": 1, + "name": "Inclusion Concept Set", + "expression": { + "items": [ + { + "concept": { + "CONCEPT_CLASS_ID": "Clinical Finding", + "CONCEPT_CODE": "C2P1", + "CONCEPT_ID": 3, + "CONCEPT_NAME": "Child 2 [Parent: 1]", + "DOMAIN_ID": "CONDITION", + "INVALID_REASON": "V", + "INVALID_REASON_CAPTION": "Valid", + "STANDARD_CONCEPT": "S", + "STANDARD_CONCEPT_CAPTION": "Standard", + "VOCABULARY_ID": "TestVocab" + }, + "includeDescendants": true + } + ] + } + } + ], + "PrimaryCriteria": { + "CriteriaList": [ + { + "ConditionOccurrence": { + "CodesetId": 0 + } + } + ], + "ObservationWindow": { + "PriorDays": 0, + "PostDays": 0 + }, + "PrimaryCriteriaLimit": { + "Type": "First" + } + }, + "QualifiedLimit": { + "Type": "First" + }, + "ExpressionLimit": { + "Type": "First" + }, + "InclusionRules": [ + { + "name": "Prior Condition", + "expression": { + "Type": "ALL", + "CriteriaList": [ + { + "Criteria": { + "ConditionOccurrence": { + "CodesetId": 1 + } + }, + "StartWindow": { + "Start": { + "Coeff": -1 + }, + "End": { + "Days": 1, + "Coeff": -1 + }, + "UseEventEnd": false + }, + "Occurrence": { + "Type": 2, + "Count": 1 + } + } + ], + "DemographicCriteriaList": [], + "Groups": [] + } + } + ], + "CensoringCriteria": [], + "CollapseSettings": { + "CollapseType": "ERA", + "EraPad": 0 + }, + "CensorWindow": {} +} \ No newline at end of file diff --git a/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule_PREP.json b/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule_PREP.json new file mode 100644 index 00000000..7a2bf8d8 --- /dev/null +++ b/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule_PREP.json @@ -0,0 +1,57 @@ +{ + "cdm.person": [ + { + "person_id":1, + "gender_concept_id":0, + "year_of_birth":0, + "race_concept_id":0, + "ethnicity_concept_id":0 + }, + { + "person_id":2, + "gender_concept_id":0, + "year_of_birth":0, + "race_concept_id":0, + "ethnicity_concept_id":0 + } + ], + "cdm.condition_occurrence": [ + { + "condition_occurrence_id": 1, + "person_id":1, + "condition_concept_id":2, + "condition_start_date":"2000-06-01", + "condition_type_concept_id":0 + }, + { + "condition_occurrence_id": 2, + "person_id":1, + "condition_concept_id":3, + "condition_start_date":"2000-01-01", + "condition_type_concept_id":0 + }, + { + "condition_occurrence_id": 3, + "person_id":2, + "condition_concept_id":2, + "condition_start_date":"2000-01-01", + "condition_type_concept_id":0 + } + ], + "cdm.observation_period" : [ + { + "observation_period_id": 1, + "person_id":1, + "observation_period_start_date":"2000-01-01", + "observation_period_end_date":"2001-01-01", + "period_type_concept_id": 0 + }, + { + "observation_period_id": 2, + "person_id":2, + "observation_period_start_date":"2000-01-01", + "observation_period_end_date":"2001-01-01", + "period_type_concept_id": 0 + } + ] +} \ No newline at end of file diff --git a/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule_VERIFY.json b/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule_VERIFY.json new file mode 100644 index 00000000..f9b57230 --- /dev/null +++ b/src/test/resources/cohortgeneration/inclusionRules/simpleInclusionRule_VERIFY.json @@ -0,0 +1,36 @@ +{ + "simpleInclusionRule.cohort": [ + { + "cohort_definition_id":1, + "subject_id":1, + "cohort_start_date":"2000-06-01", + "cohort_end_date":"2001-01-01" + } + ], + "simpleInclusionRule.cohort_inclusion_result": [ + { + "cohort_definition_id":1, + "inclusion_rule_mask":0, + "person_count":1, + "mode_id": 0 + }, + { + "cohort_definition_id":1, + "inclusion_rule_mask":1, + "person_count":1, + "mode_id": 0 + }, + { + "cohort_definition_id":1, + "inclusion_rule_mask":0, + "person_count":1, + "mode_id": 1 + }, + { + "cohort_definition_id":1, + "inclusion_rule_mask":1, + "person_count":1, + "mode_id": 1 + } + ] +} \ No newline at end of file