io: remove pandas based io for lp writing, use polars instead

PyPSA · Oct 21, 2024 · 83c43db · 83c43db
1 parent 40a27f9
commit 83c43db
Show file tree

Hide file tree

Showing 4 changed files with 97 additions and 332 deletions.
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
@@ -5,6 +5,7 @@ Upcoming Version
 ----------------
 
 * When writing out an LP file, large variables and constraints are now chunked to avoid memory issues. This is especially useful for large models with constraints with many terms. The chunk size can be set with the `slice_size` argument in the `solve` function.
+* To achieve better performance, the LP file writing is now using the `polars` package per default. Setting `io_api` to `lp-polars` is therefore deprecated, as the standard `io_api=lp` uses the `polars` package. The user should see no difference from this change but faster lp file writing. The previous `pandas` based implementation was removed.
 
 Version 0.3.15
 --------------

diff --git a/linopy/common.py b/linopy/common.py
@@ -340,13 +340,24 @@ def check_has_nulls_polars(df: pl.DataFrame, name: str = "") -> None:
 
     Raises:
     ------
-        ValueError: If the DataFrame contains null values,
-        a ValueError is raised with a message indicating the name of the constraint and the fields containing null values.
+        ValueError: If the DataFrame contains null values or NaN values,
+        a ValueError is raised with a message indicating the name of the constraint and the fields containing nulls.
     """
-    has_nulls = df.select(pl.col("*").is_null().any())
-    null_columns = [col for col in has_nulls.columns if has_nulls[col][0]]
+    null_check = df.select(
+        [
+            (
+                pl.col(col).is_null()
+                | (pl.col(col).is_nan() if dtype == pl.Float64 else False)
+            )
+            .any()
+            .alias(col)
+            for col, dtype in zip(df.columns, df.dtypes)
+        ]
+    )
+
+    null_columns = [col for col in null_check.columns if null_check[col][0]]
     if null_columns:
-        raise ValueError(f"{name} contains nan's in field(s) {null_columns}")
+        raise ValueError(f"{name} contains null/nan values in field(s) {null_columns}")
 
 
 def filter_nulls_polars(df: pl.DataFrame) -> pl.DataFrame: