Revert "chore: fix doctest syntax"

This reverts commit 630c0c9.
opentargets · project-defiant · Jan 28, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
commit 18c66b1d1d9c4833e3dbfc406ba3440577a119a5
diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py
@@ -119,7 +119,7 @@ def pvalue_to_zscore(pval_col: Column) -> Column:
         | t3|   0.05|  1.959964|
         | t4| 1e-300| 37.537838|
         | t5|1e-1000| 37.537838|
-        | t6|     NA|      NULL|
+        | t6|     NA|      null|
         +---+-------+----------+
         <BLANKLINE>
 
@@ -149,7 +149,7 @@ def nullify_empty_array(column: Column) -> Column:
     +---------+---------+
     |    value|      new|
     +---------+---------+
-    |       []|     NULL|
+    |       []|     null|
     |[1, 2, 3]|[1, 2, 3]|
     +---------+---------+
     <BLANKLINE>
@@ -472,8 +472,8 @@ def map_column_by_dictionary(col: Column, mapping_dict: dict[str, Any]) -> Colum
         |               label|       id|
         +--------------------+---------+
         |       consequence_1|SO:000000|
-        |unmapped_consequence|     NULL|
-        |                NULL|     NULL|
+        |unmapped_consequence|     null|
+        |                null|     null|
         +--------------------+---------+
         <BLANKLINE>
     """
@@ -604,7 +604,7 @@ def rename_all_columns(df: DataFrame, prefix: str) -> DataFrame:
         +-----------+-----------+-----------+
         |          a|        1.2|       true|
         |          b|        0.0|      false|
-        |          c|       NULL|       NULL|
+        |          c|       null|       null|
         +-----------+-----------+-----------+
         <BLANKLINE>
     """
@@ -649,7 +649,7 @@ def safe_array_union(
         |[a, b]|
         |   [c]|
         |   [d]|
-        |  NULL|
+        |  null|
         +------+
         <BLANKLINE>
         >>> schema="arr2: array<struct<b:int,a:string>>, arr: array<struct<a:string,b:int>>"
@@ -752,7 +752,7 @@ def create_empty_column_if_not_exists(
         +----+----+----+
         |col1|col2|col3|
         +----+----+----+
-        |   1|   2|NULL|
+        |   1|   2|null|
         +----+----+----+
         <BLANKLINE>
     """
@@ -782,8 +782,8 @@ def get_standard_error_from_confidence_interval(lower: Column, upper: Column) ->
         |     standard_error|
         +-------------------+
         |0.25510204081632654|
-        |               NULL|
-        |               NULL|
+        |               null|
+        |               null|
         +-------------------+
         <BLANKLINE>
     """

diff --git a/src/gentropy/common/utils.py b/src/gentropy/common/utils.py
@@ -50,7 +50,7 @@ def calculate_confidence_interval(
         |pvalue_mantissa|pvalue_exponent|beta|standard_error|betaConfidenceIntervalLower|betaConfidenceIntervalUpper|
         +---------------+---------------+----+--------------+---------------------------+---------------------------+
         |            2.5|            -10| 0.5|           0.2|        0.10799999999999998|                      0.892|
-        |            3.0|             -5| 1.0|          NULL|         0.5303663900832607|         1.4696336099167393|
+        |            3.0|             -5| 1.0|          null|         0.5303663900832607|         1.4696336099167393|
         |            1.5|             -8|-0.2|           0.1|                     -0.396|       -0.00400000000000...|
         +---------------+---------------+----+--------------+---------------------------+---------------------------+
         <BLANKLINE>
@@ -98,9 +98,9 @@ def convert_odds_ratio_to_beta(
         |beta|oddsRatio|standardError|               beta|standardError|
         +----+---------+-------------+-------------------+-------------+
         | 0.1|      1.1|          0.1|                0.1|          0.1|
-        |NULL|      1.1|          0.1|0.09531017980432493|         NULL|
-        | 0.1|     NULL|          0.1|                0.1|          0.1|
-        | 0.1|      1.1|         NULL|                0.1|         NULL|
+        |null|      1.1|          0.1|0.09531017980432493|         null|
+        | 0.1|     null|          0.1|                0.1|          0.1|
+        | 0.1|      1.1|         null|                0.1|         null|
         +----+---------+-------------+-------------------+-------------+
         <BLANKLINE>
 

diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py
@@ -94,7 +94,7 @@ def hash_long_variant_ids(
             |v_short     |v_short                                     |
             |v_looooooong|OTVAR_23_23_3749d019d645894770c364992ae70a05|
             |no_chrom    |OTVAR_41acfcd7d4fd523b33600b504914ef25      |
-            |NULL        |NULL                                        |
+            |null        |null                                        |
             +------------+--------------------------------------------+
             <BLANKLINE>
         """

diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py
@@ -192,7 +192,7 @@ def _colocated_variants_to_rsids(colocated_variants: Column) -> Column:
             |rsIds          |
             +---------------+
             |[rs1, rs2, rs3]|
-            |[NULL]         |
+            |[null]         |
             +---------------+
             <BLANKLINE>
         """
@@ -380,8 +380,8 @@ def _get_max_alpha_missense(transcripts: Column) -> Column:
         +-----------------------------------------------------+
         |am                                                   |
         +-----------------------------------------------------+
-        |{AlphaMissense, assessment 1, 0.4, NULL, gene1, NULL}|
-        |{AlphaMissense, NULL, NULL, NULL, gene1, NULL}       |
+        |{AlphaMissense, assessment 1, 0.4, null, gene1, null}|
+        |{AlphaMissense, null, null, null, gene1, null}       |
         +-----------------------------------------------------+
         <BLANKLINE>
         """
@@ -513,8 +513,8 @@ def _parser_amino_acid_change(amino_acids: Column, protein_end: Column) -> Colum
             |amino_acid_change|
             +-----------------+
             |              A1B|
-            |             NULL|
-            |             NULL|
+            |             null|
+            |             null|
             +-----------------+
             <BLANKLINE>
         """

diff --git a/src/gentropy/datasource/gwas_catalog/associations.py b/src/gentropy/datasource/gwas_catalog/associations.py
@@ -120,9 +120,9 @@ def _normalise_pvaluetext(p_value_text: Column) -> Column:
             |  European Ancestry|      [EA]|
             |   African ancestry|      [AA]|
             |Alzheimer’s Disease|      [AD]|
-            |      (progression)|      NULL|
-            |                   |      NULL|
-            |               NULL|      NULL|
+            |      (progression)|      null|
+            |                   |      null|
+            |               null|      null|
             +-------------------+----------+
             <BLANKLINE>
 
@@ -297,7 +297,9 @@ def _map_variants_to_gnomad_variants(
             "position",
         )
 
-        return gwas_associations.join(fully_mapped_associations, on="rowId", how="left")
+        return gwas_associations.join(
+            fully_mapped_associations, on="rowId", how="left"
+        )
 
     @staticmethod
     def _compare_rsids(gnomad: Column, gwas: Column) -> Column:
@@ -421,7 +423,7 @@ def _check_concordance(
             |         A|              T|              G|        true|
             |         A|              C|              G|       false|
             |         A|              A|              ?|        true|
-            |      NULL|           NULL|              A|        true|
+            |      null|           null|              A|        true|
             +----------+---------------+---------------+------------+
             <BLANKLINE>
 
@@ -479,7 +481,7 @@ def _get_reverse_complement(allele_col: Column) -> Column:
             |    AC|           GT|
             |GTaatc|       GATTAC|
             |     ?|            ?|
-            |  NULL|         NULL|
+            |  null|         null|
             +------+-------------+
             <BLANKLINE>
 
@@ -549,7 +551,7 @@ def _are_alleles_palindromic(
             |       AG|       AT|         false|
             |       AT|       AT|          true|
             |   CATATG|   CATATG|          true|
-            |     NULL|        -|         false|
+            |     null|        -|         false|
             +---------+---------+--------------+
             <BLANKLINE>
 
@@ -651,7 +653,7 @@ def _harmonise_odds_ratio(
         |   0.5|false|       0.5|
         |   0.5| true|       2.0|
         |   0.0|false|       0.0|
-        |   0.0| true|      NULL|
+        |   0.0| true|      null|
         +------+-----+----------+
         <BLANKLINE>
         """
@@ -691,7 +693,7 @@ def _concatenate_substudy_description(
         |association_trait|mapped_trait_uri                                                         |pvalue_text      |substudy_description                      |
         +-----------------+-------------------------------------------------------------------------+-----------------+------------------------------------------+
         |Height           |http://www.ebi.ac.uk/efo/EFO_0000001,http://www.ebi.ac.uk/efo/EFO_0000002|European Ancestry|Height|EA|EFO_0000001/EFO_0000002         |
-        |Schizophrenia    |http://www.ebi.ac.uk/efo/MONDO_0005090                                   |NULL             |Schizophrenia|no_pvalue_text|MONDO_0005090|
+        |Schizophrenia    |http://www.ebi.ac.uk/efo/MONDO_0005090                                   |null             |Schizophrenia|no_pvalue_text|MONDO_0005090|
         +-----------------+-------------------------------------------------------------------------+-----------------+------------------------------------------+
         <BLANKLINE>
         """
@@ -801,9 +803,9 @@ def _qc_genomic_location(
             +----------------------------+----------+--------+
             |qc                          |chromosome|position|
             +----------------------------+----------+--------+
-            |[Incomplete genomic mapping]|NULL      |NULL    |
-            |[Incomplete genomic mapping]|1         |NULL    |
-            |[Incomplete genomic mapping]|NULL      |1       |
+            |[Incomplete genomic mapping]|null      |null    |
+            |[Incomplete genomic mapping]|1         |null    |
+            |[Incomplete genomic mapping]|null      |1       |
             |[]                          |1         |1       |
             +----------------------------+----------+--------+
             <BLANKLINE>
@@ -865,8 +867,8 @@ def _qc_unmapped_variants(qc: Column, alternate_allele: Column) -> Column:
             +----------------+----+--------------------+
             |alternate_allele|  qc|              new_qc|
             +----------------+----+--------------------+
-            |               A|NULL|                  []|
-            |            NULL|NULL|[No mapping in Gn...|
+            |               A|null|                  []|
+            |            null|null|[No mapping in Gn...|
             +----------------+----+--------------------+
             <BLANKLINE>
 
@@ -938,7 +940,7 @@ def _get_effect_type(ci_text: Column) -> Column:
             |95% CI: [0.1-0.2]         |odds_ratio |
             |95% CI: [0.1-0.2] increase|beta       |
             |95% CI: [0.1-0.2] decrease|beta       |
-            |NULL                      |NULL       |
+            |null                      |null       |
             +--------------------------+-----------+
             <BLANKLINE>
 
@@ -992,11 +994,11 @@ def harmonise_association_effect_to_beta(
             +-------------------------+---------------+---------------+----------+--------------------+-------------------+--------------------+
             |STRONGEST SNP-RISK ALLELE|referenceAllele|alternateAllele|OR or BETA|       95% CI (TEXT)|               beta|       standardError|
             +-------------------------+---------------+---------------+----------+--------------------+-------------------+--------------------+
-            |                  rs123-T|              A|              T|       0.1|[0.08-0.12] unit ...|               NULL|                NULL|
+            |                  rs123-T|              A|              T|       0.1|[0.08-0.12] unit ...|               null|                null|
             |                  rs123-C|              G|              T|       0.1|[0.08-0.12] unit ...|               -0.1|0.010204081404574064|
             |                  rs123-T|              C|              T|       0.1|[0.08-0.12] unit ...|                0.1|0.010204081404574064|
-            |                  rs123-T|              C|              T|       0.1|         [0.08-0.12]|-2.3025850929940455|                NULL|
-            |                  rs123-C|              G|              T|       0.1|         [0.08-0.12]|  2.302585092994046|                NULL|
+            |                  rs123-T|              C|              T|       0.1|         [0.08-0.12]|-2.3025850929940455|                null|
+            |                  rs123-C|              G|              T|       0.1|         [0.08-0.12]|  2.302585092994046|                null|
             +-------------------------+---------------+---------------+----------+--------------------+-------------------+--------------------+
             <BLANKLINE>
         """
@@ -1106,8 +1108,7 @@ def from_source(
         return StudyLocusGWASCatalog(
             _df=gwas_associations.withColumn(
                 # temporary column
-                "rowId",
-                f.monotonically_increasing_id().cast(StringType()),
+                "rowId", f.monotonically_increasing_id().cast(StringType())
             )
             .transform(
                 # Map/harmonise variants to variant annotation dataset:
@@ -1139,7 +1140,9 @@ def from_source(
             # Adding study-locus id:
             .withColumn(
                 "studyLocusId",
-                StudyLocus.assign_study_locus_id(["studyId", "variantId"]),
+                StudyLocus.assign_study_locus_id(
+                    ["studyId", "variantId"]
+                ),
             )
             .select(
                 # INSIDE STUDY-LOCUS SCHEMA:

diff --git a/src/gentropy/datasource/gwas_catalog/study_index.py b/src/gentropy/datasource/gwas_catalog/study_index.py
@@ -219,7 +219,7 @@ def parse_cohorts(raw_cohort: Column) -> Column:
         |parsedCohorts                         |
         +--------------------------------------+
         |[BioME, CaPS, Estonia, FHS, UKB, GERA]|
-        |NULL                                  |
+        |null                                  |
         +--------------------------------------+
         <BLANKLINE>
         """
@@ -655,7 +655,7 @@ def add_no_sumstats_flag(self: StudyIndexGWASCatalog) -> StudyIndexGWASCatalog:
         """
         self.df = self.df.withColumn(
             "qualityControls",
-            f.array(f.lit(StudyQualityCheck.SUMSTATS_NOT_AVAILABLE.value)),
+            f.array(f.lit(StudyQualityCheck.SUMSTATS_NOT_AVAILABLE.value))
         )
         return self
 

diff --git a/src/gentropy/method/colocalisation.py b/src/gentropy/method/colocalisation.py
@@ -51,8 +51,8 @@ def get_tag_variant_source(statistics: Column) -> Column:
         |   a|   b|source|
         +----+----+------+
         |   a|   b|  both|
-        |NULL|   b| right|
-        |   a|NULL|  left|
+        |null|   b| right|
+        |   a|null|  left|
         +----+----+------+
         <BLANKLINE>
     """
@@ -181,8 +181,8 @@ def colocalise(
                 .withColumn("colocalisationMethod", f.lit(cls.METHOD_NAME))
                 .join(
                     overlapping_signals.calculate_beta_ratio(),
-                    on=["leftStudyLocusId", "rightStudyLocusId", "chromosome"],
-                    how="left",
+                    on=["leftStudyLocusId", "rightStudyLocusId","chromosome"],
+                    how="left"
                 )
             ),
             _schema=Colocalisation.get_schema(),
@@ -386,8 +386,8 @@ def colocalise(
                 .withColumn("colocalisationMethod", f.lit(cls.METHOD_NAME))
                 .join(
                     overlapping_signals.calculate_beta_ratio(),
-                    on=["leftStudyLocusId", "rightStudyLocusId", "chromosome"],
-                    how="left",
+                    on=["leftStudyLocusId", "rightStudyLocusId","chromosome"],
+                    how="left"
                 )
             ),
             _schema=Colocalisation.get_schema(),