diff --git a/src/otg/assets/schemas/variant_index.json b/src/otg/assets/schemas/variant_index.json index e4f41caa7..7857a46f8 100644 --- a/src/otg/assets/schemas/variant_index.json +++ b/src/otg/assets/schemas/variant_index.json @@ -76,26 +76,72 @@ "metadata": {} }, { - "name": "cadd", + "name": "inSilicoPredictors", + "nullable": false, + "metadata": {}, "type": { "type": "struct", "fields": [ { - "name": "phred", - "type": "float", + "name": "cadd", + "nullable": true, + "metadata": {}, + "type": { + "type": "struct", + "fields": [ + { + "name": "raw", + "type": "float", + "nullable": true, + "metadata": {} + }, + { + "name": "phred", + "type": "float", + "nullable": true, + "metadata": {} + } + ] + } + }, + { + "name": "revelMax", + "type": "double", "nullable": true, "metadata": {} }, { - "name": "raw", + "name": "spliceaiDsMax", "type": "float", "nullable": true, "metadata": {} + }, + { + "name": "pangolinLargestDs", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "phylop", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "siftMax", + "type": "double", + "nullable": true, + "metadata": {} + }, + { + "name": "polypheMax", + "type": "double", + "nullable": true, + "metadata": {} } ] - }, - "nullable": true, - "metadata": {} + } }, { "name": "mostSevereConsequence", diff --git a/src/otg/dataset/variant_index.py b/src/otg/dataset/variant_index.py index f417b0f9e..647584f98 100644 --- a/src/otg/dataset/variant_index.py +++ b/src/otg/dataset/variant_index.py @@ -58,7 +58,7 @@ def from_variant_annotation( "positionB37", "alleleType", "alleleFrequencies", - "cadd", + "inSilicoPredictors", ] va_slimmed = variant_annotation.filter_by_variant_df( study_locus.unique_variants_in_locus() diff --git a/tests/conftest.py b/tests/conftest.py index e31255f55..89789f829 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -354,13 +354,13 @@ def mock_variant_annotation(spark: SparkSession) -> VariantAnnotation: expr='named_struct("cadd", named_struct("phred", cast(rand() as float), "raw_score", cast(rand() as float)), "revelMax", cast(rand() as double), "spliceaiDsMax", cast(rand() as float), "pangolinLargestDs", cast(rand() as double), "phylop", cast(rand() as double), "polyphenMax", cast(rand() as double), "siftMax", cast(rand() as double))', percentNulls=0.1, ) - ) return VariantAnnotation(_df=data_spec.build(), _schema=va_schema) + @pytest.fixture() def mock_variant_index(spark: SparkSession) -> VariantIndex: - """Mock gene index.""" + """Mock variant index.""" vi_schema = VariantIndex.get_schema() data_spec = ( @@ -383,8 +383,8 @@ def mock_variant_index(spark: SparkSession) -> VariantIndex: percentNulls=0.1, ) .withColumnSpec( - "cadd", - expr='named_struct("phred", cast(rand() AS float), "raw", cast(rand() AS float))', + "inSilicoPredictors", + expr='named_struct("cadd", named_struct("phred", cast(rand() as float), "raw_score", cast(rand() as float)), "revelMax", cast(rand() as double), "spliceaiDsMax", cast(rand() as float), "pangolinLargestDs", cast(rand() as double), "phylop", cast(rand() as double), "polyphenMax", cast(rand() as double), "siftMax", cast(rand() as double))', percentNulls=0.1, ) .withColumnSpec("rsIds", expr="array(cast(rand() AS string))", percentNulls=0.1)