Skip to content

Commit

Permalink
chore: comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Szymon Szyszkowski committed Dec 18, 2024
1 parent faee7d8 commit 897b1f0
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 3 deletions.
3 changes: 1 addition & 2 deletions src/gentropy/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from pyspark.sql import DataFrame
from pyspark.sql import functions as f
from pyspark.sql import types as t
from pyspark.sql.types import DoubleType
from pyspark.sql.window import Window
from typing_extensions import Self

Expand Down Expand Up @@ -263,7 +262,7 @@ def drop_infinity_values(self: Self, *cols: str) -> Self:
if len(cols) == 0:
return self
inf_strings = ("Inf", "+Inf", "-Inf", "Infinity", "+Infinity", "-Infinity")
inf_values = [f.lit(v).cast(DoubleType()) for v in inf_strings]
inf_values = [f.lit(v).cast(t.DoubleType()) for v in inf_strings]
conditions = [f.col(c).isin(inf_values) for c in cols]
# reduce individual filter expressions with or statement
# to col("beta").isin([lit(Inf)]) | col("beta").isin([lit(Inf)])...
Expand Down
1 change: 0 additions & 1 deletion tests/gentropy/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def test_dataset_drop_infinity_values() -> None:
rows = [(v,) for v in data]
schema = StructType([StructField("field", DoubleType())])
input_df = spark.createDataFrame(rows, schema=schema)
input_df.printSchema()

assert input_df.count() == 7
# run without specifying *cols results in no filtering
Expand Down

0 comments on commit 897b1f0

Please sign in to comment.