diff --git a/src/main/java/org/cbioportal/persistence/helper/StudyViewFilterHelper.java b/src/main/java/org/cbioportal/persistence/helper/StudyViewFilterHelper.java index 2ee580f133f..d0c87d2eabb 100644 --- a/src/main/java/org/cbioportal/persistence/helper/StudyViewFilterHelper.java +++ b/src/main/java/org/cbioportal/persistence/helper/StudyViewFilterHelper.java @@ -135,42 +135,54 @@ public boolean isCategoricalClinicalDataFilter(ClinicalDataFilter clinicalDataFi * Merge the range of numerical bins in DataFilters to reduce the number of scans that runs on the database when filtering. */ public static List mergeDataFilters(List filters) { - boolean isNonNumericalOnly = true; + // this should throw error or move to all binning endpoints in the future for input validation + if (!areValidFilters(filters)) { + return filters; + } + + boolean hasNumericalValue = false; List mergedDataFilters = new ArrayList<>(); for (T filter : filters) { List mergedValues = new ArrayList<>(); List nonNumericalValues = new ArrayList<>(); + // record the start and end of current merging range BigDecimal mergedStart = null; BigDecimal mergedEnd = null; + // for each value for (DataFilterValue dataFilterValue : filter.getValues()) { - // leave non-numerical values as they are + // if it is non-numerical, leave it as is if (dataFilterValue.getValue() != null) { nonNumericalValues.add(dataFilterValue); + continue; + } + // else it is numerical so start merging process + hasNumericalValue = true; + BigDecimal start = dataFilterValue.getStart(); + BigDecimal end = dataFilterValue.getEnd(); + + // if current merging range is null, we take current bin's range + if (mergedStart == null && mergedEnd == null) { + mergedStart = start; + mergedEnd = end; + } + // else we already has a merging range, we check if this one is consecutive of our range + else if (mergedEnd.equals(start)) { + // if true, we expand our range + mergedEnd = end; } - // merge adjacent numerical bins else { - isNonNumericalOnly = false; - BigDecimal start = dataFilterValue.getStart(); - BigDecimal end = dataFilterValue.getEnd(); - - if (mergedStart == null && mergedEnd == null) { - mergedStart = start; - mergedEnd = end; - } - else if (mergedEnd.equals(start)) { - mergedEnd = end; - } else { - mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null)); - mergedStart = start; - mergedEnd = end; - } + // otherwise it's a gap, so we save our current range first, and then use current bin to start the next range + mergedValues.add(new DataFilterValue(mergedStart, mergedEnd)); + mergedStart = start; + mergedEnd = end; } } - if (!isNonNumericalOnly) { - mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null)); + // in the end we need to save the final range, but if everything is non-numerical then no need to + if (hasNumericalValue) { + mergedValues.add(new DataFilterValue(mergedStart, mergedEnd)); } mergedValues.addAll(nonNumericalValues); filter.setValues(mergedValues); @@ -179,4 +191,63 @@ else if (mergedEnd.equals(start)) { return mergedDataFilters; } + + public static boolean areValidFilters(List filters) { + if (filters == null || filters.isEmpty()) { + return false; + } + + for (T filter : filters) { + if (!isValidFilter(filter)) { + return false; + } + } + return true; + } + + private static boolean isValidFilter(T filter) { + if (filter == null || filter.getValues() == null || filter.getValues().isEmpty()) { + return false; + } + + BigDecimal start = null; + BigDecimal end = null; + for (DataFilterValue value : filter.getValues()) { + if (!validateDataFilterValue(value, start, end)) { + return false; + } + // update start and end values to check next bin range + if (value.getStart() != null) { + start = value.getStart(); + } + if (value.getEnd() != null) { + end = value.getEnd(); + } + } + return true; + } + + private static boolean validateDataFilterValue(DataFilterValue value, BigDecimal lastStart, BigDecimal lastEnd) { + // non-numerical value should not have numerical value + if (value.getValue() != null) { + return value.getStart() == null && value.getEnd() == null; + } + + // check if start < end + if (value.getStart() != null && value.getEnd() != null + && value.getStart().compareTo(value.getEnd()) >= 0) { + return false; + } + + // check if start stays increasing and no overlapping + if (value.getStart() != null + && ((lastStart != null && lastStart.compareTo(value.getStart()) >= 0) + || (lastEnd != null && value.getStart().compareTo(lastEnd) < 0))) { + return false; + } + + // check if end stays increasing + return value.getEnd() == null || lastEnd == null + || lastEnd.compareTo(value.getEnd()) < 0; + } } diff --git a/src/main/java/org/cbioportal/web/parameter/DataFilterValue.java b/src/main/java/org/cbioportal/web/parameter/DataFilterValue.java index af434cfa55d..47fdb0937e9 100644 --- a/src/main/java/org/cbioportal/web/parameter/DataFilterValue.java +++ b/src/main/java/org/cbioportal/web/parameter/DataFilterValue.java @@ -11,9 +11,12 @@ public class DataFilterValue implements Serializable { public DataFilterValue() {} - public DataFilterValue(BigDecimal start, BigDecimal end, String value) { + public DataFilterValue(BigDecimal start, BigDecimal end) { this.start = start; this.end = end; + } + + public DataFilterValue(String value) { this.value = value; } diff --git a/src/test/java/org/cbioportal/persistence/helper/StudyViewFilterHelperTest.java b/src/test/java/org/cbioportal/persistence/helper/StudyViewFilterHelperTest.java index f9044f09d37..1f0e10225a1 100644 --- a/src/test/java/org/cbioportal/persistence/helper/StudyViewFilterHelperTest.java +++ b/src/test/java/org/cbioportal/persistence/helper/StudyViewFilterHelperTest.java @@ -18,9 +18,9 @@ public class StudyViewFilterHelperTest { public void testMergeDataFilterNumericalContinuousValues() { List genomicDataFilters = new ArrayList<>(); List values = new ArrayList<>(); - values.add(new DataFilterValue(BigDecimal.valueOf(-5), BigDecimal.valueOf(-1), null)); - values.add(new DataFilterValue(BigDecimal.valueOf(-1), BigDecimal.valueOf(3), null)); - values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(7), null)); + values.add(new DataFilterValue(BigDecimal.valueOf(-5), BigDecimal.valueOf(-1))); + values.add(new DataFilterValue(BigDecimal.valueOf(-1), BigDecimal.valueOf(3))); + values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(7))); genomicDataFilters.add(new GenomicDataFilter(null, null, values)); List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); @@ -36,10 +36,10 @@ public void testMergeDataFilterNumericalContinuousValues() { public void testMergeDataFilterNumericalDiscontinuousValues() { List genomicDataFilters = new ArrayList<>(); List values = new ArrayList<>(); - values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null)); - values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null)); - values.add(new DataFilterValue(BigDecimal.valueOf(-1.75), BigDecimal.valueOf(-1.5), null)); - values.add(new DataFilterValue(BigDecimal.valueOf(-1.5), BigDecimal.valueOf(-1.25), null)); + values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25))); + values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2))); + values.add(new DataFilterValue(BigDecimal.valueOf(-1.75), BigDecimal.valueOf(-1.5))); + values.add(new DataFilterValue(BigDecimal.valueOf(-1.5), BigDecimal.valueOf(-1.25))); genomicDataFilters.add(new GenomicDataFilter(null, null, values)); List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); @@ -60,9 +60,9 @@ public void testMergeDataFilterNumericalDiscontinuousValues() { public void testMergeDataFilterNumericalInfiniteValues() { List genomicDataFilters = new ArrayList<>(); List values = new ArrayList<>(); - values.add(new DataFilterValue(null, BigDecimal.valueOf(-2.25), null)); - values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null)); - values.add(new DataFilterValue(BigDecimal.valueOf(-2), null, null)); + values.add(new DataFilterValue(null, BigDecimal.valueOf(-2.25))); + values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2))); + values.add(new DataFilterValue(BigDecimal.valueOf(-2), null)); genomicDataFilters.add(new GenomicDataFilter(null, null, values)); List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); @@ -79,9 +79,9 @@ public void testMergeDataFilterNumericalInfiniteValues() { public void testMergeDataFilterNumericalNonNumericalValues() { List genomicDataFilters = new ArrayList<>(); List values = new ArrayList<>(); - values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null)); - values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null)); - values.add(new DataFilterValue(null, null, "NA")); + values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25))); + values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2))); + values.add(new DataFilterValue("NA")); genomicDataFilters.add(new GenomicDataFilter(null, null, values)); List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); @@ -99,7 +99,7 @@ public void testMergeDataFilterNumericalNonNumericalValues() { public void testMergeDataFilterNonNumericalOnlyValues() { List genomicDataFilters = new ArrayList<>(); List values = new ArrayList<>(); - values.add(new DataFilterValue(null, null, "NA")); + values.add(new DataFilterValue("NA")); genomicDataFilters.add(new GenomicDataFilter(null, null, values)); List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); @@ -107,4 +107,108 @@ public void testMergeDataFilterNonNumericalOnlyValues() { String value = mergedDataFilterValues.getFirst().getValue(); assertEquals("NA", value); } + + // invalid null / empty -> unprocessed null / empty + @Test + public void testMergeDataFilterEmptyValidation() { + List mergedUninstantiatedFilters = StudyViewFilterHelper.mergeDataFilters(null); + assertNull(mergedUninstantiatedFilters); + + List mergedEmptyFilters = StudyViewFilterHelper.mergeDataFilters(new ArrayList<>()); + assertEquals(0, mergedEmptyFilters.size()); + + List uninstantiatedDataFilters = new ArrayList<>(); + uninstantiatedDataFilters.add(null); + List mergedUninstantiatedDataFilters = StudyViewFilterHelper.mergeDataFilters(uninstantiatedDataFilters); + assertNull(mergedUninstantiatedDataFilters.getFirst()); + + List uninstantiatedValueFilters = new ArrayList<>(); + uninstantiatedValueFilters.add(new GenomicDataFilter(null, null, null)); + List mergedUninstantiatedValueFilters = StudyViewFilterHelper.mergeDataFilters(uninstantiatedValueFilters); + assertNull(mergedUninstantiatedValueFilters.getFirst().getValues()); + + List emptyValueFilters = new ArrayList<>(); + emptyValueFilters.add(new GenomicDataFilter(null, null, new ArrayList<>())); + List mergedEmptyValueFilters = StudyViewFilterHelper.mergeDataFilters(emptyValueFilters); + assertEquals(0, mergedEmptyValueFilters.getFirst().getValues().size()); + } + + // invalid (2, 3, "NA"] -> unprocessed (2, 3, "NA"] + @Test + public void testMergeDataFilterNonNumericalValidation() { + List genomicDataFilters = new ArrayList<>(); + List invalidValues = new ArrayList<>(); + DataFilterValue invalidValue = new DataFilterValue("NA"); + invalidValue.setStart(BigDecimal.valueOf(2)); + invalidValue.setEnd(BigDecimal.valueOf(3)); + invalidValues.add(invalidValue); + genomicDataFilters.add(new GenomicDataFilter(null, null, invalidValues)); + + List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); + List mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues(); + BigDecimal start = mergedDataFilterValues.getFirst().getStart(); + BigDecimal end = mergedDataFilterValues.getFirst().getEnd(); + String value = mergedDataFilterValues.getFirst().getValue(); + assertEquals(0, BigDecimal.valueOf(2).compareTo(start)); + assertEquals(0, BigDecimal.valueOf(3).compareTo(end)); + assertEquals("NA", value); + } + + // invalid (42, 6] -> unprocessed (42, 6] + @Test + public void testMergeDataFilterRangeValidation() { + List genomicDataFilters = new ArrayList<>(); + List invalidValues = new ArrayList<>(); + invalidValues.add(new DataFilterValue(BigDecimal.valueOf(42), BigDecimal.valueOf(6))); + genomicDataFilters.add(new GenomicDataFilter(null, null, invalidValues)); + + List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); + List mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues(); + BigDecimal start = mergedDataFilterValues.getFirst().getStart(); + BigDecimal end = mergedDataFilterValues.getFirst().getEnd(); + assertEquals(0, BigDecimal.valueOf(42).compareTo(start)); + assertEquals(0, BigDecimal.valueOf(6).compareTo(end)); + } + + // invalid (3, 5], (1, 3] -> unprocessed (3, 5], (1, 3] + @Test + public void testMergeDataFilterContinuousValidation() { + List genomicDataFilters = new ArrayList<>(); + List values = new ArrayList<>(); + values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(5))); + values.add(new DataFilterValue(BigDecimal.valueOf(1), BigDecimal.valueOf(3))); + genomicDataFilters.add(new GenomicDataFilter(null, null, values)); + + List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); + List mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues(); + BigDecimal firstStart = mergedDataFilterValues.getFirst().getStart(); + BigDecimal firstEnd = mergedDataFilterValues.getFirst().getEnd(); + assertEquals(0, BigDecimal.valueOf(3).compareTo(firstStart)); + assertEquals(0, BigDecimal.valueOf(5).compareTo(firstEnd)); + BigDecimal secondStart = mergedDataFilterValues.get(1).getStart(); + BigDecimal secondEnd = mergedDataFilterValues.get(1).getEnd(); + assertEquals(0, BigDecimal.valueOf(1).compareTo(secondStart)); + assertEquals(0, BigDecimal.valueOf(3).compareTo(secondEnd)); + } + + // invalid (3, 5], (2, 4] -> unprocessed (3, 5], (2, 4] + @Test + public void testMergeDataFilterNoOverlapValidation() { + List genomicDataFilters = new ArrayList<>(); + List values = new ArrayList<>(); + values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(5))); + values.add(new DataFilterValue(BigDecimal.valueOf(2), BigDecimal.valueOf(4))); + genomicDataFilters.add(new GenomicDataFilter(null, null, values)); + + List mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters); + List mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues(); + BigDecimal firstStart = mergedDataFilterValues.getFirst().getStart(); + BigDecimal firstEnd = mergedDataFilterValues.getFirst().getEnd(); + assertEquals(0, BigDecimal.valueOf(3).compareTo(firstStart)); + assertEquals(0, BigDecimal.valueOf(5).compareTo(firstEnd)); + BigDecimal secondStart = mergedDataFilterValues.get(1).getStart(); + BigDecimal secondEnd = mergedDataFilterValues.get(1).getEnd(); + assertEquals(0, BigDecimal.valueOf(2).compareTo(secondStart)); + assertEquals(0, BigDecimal.valueOf(4).compareTo(secondEnd)); + } } diff --git a/src/test/resources/clickhouse_data.sql b/src/test/resources/clickhouse_data.sql index 20b0da393dd..af1723dcf69 100644 --- a/src/test/resources/clickhouse_data.sql +++ b/src/test/resources/clickhouse_data.sql @@ -109,6 +109,7 @@ insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) val insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (4,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,'); insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (5,'2,'); insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (11,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,'); +insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (12,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,'); insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (14,'1,2,3,4,5,6,7,8,9,10,11,12,'); insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (15,'1,2,3,4,'); insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values(10,'1,2,3,4,5,6,7,8,9,10,11,');