io: write out lp file with sliced variables and constraints

PyPSA · Oct 21, 2024 · 4dba46a · 4dba46a
1 parent 03f3cc7
commit 4dba46a
Show file tree

Hide file tree

Showing 12 changed files with 394 additions and 162 deletions.
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
@@ -4,6 +4,8 @@ Release Notes
 Upcoming Version
 ----------------
 
+* When writing out an LP file, large variables and constraints are now chunked to avoid memory issues. This is especially useful for large models with constraints with many terms. The chunk size can be set with the `slice_size` argument in the `solve` function.
+
 Version 0.3.15
 --------------
 

diff --git a/linopy/common.py b/linopy/common.py
@@ -5,12 +5,14 @@
 This module contains commonly used functions.
 """
 
+from __future__ import annotations
+
 import operator
 import os
-from collections.abc import Hashable, Iterable, Mapping, Sequence
+from collections.abc import Generator, Hashable, Iterable, Mapping, Sequence
 from functools import reduce, wraps
 from pathlib import Path
-from typing import Any, Callable, Union, overload
+from typing import TYPE_CHECKING, Any, Callable, overload
 from warnings import warn
 
 import numpy as np
@@ -30,6 +32,11 @@
     sign_replace_dict,
 )
 
+if TYPE_CHECKING:
+    from linopy.constraints import Constraint
+    from linopy.expressions import LinearExpression
+    from linopy.variables import Variable
+
 
 def maybe_replace_sign(sign: str) -> str:
     """
@@ -86,7 +93,7 @@ def format_string_as_variable_name(name: Hashable):
     return str(name).replace(" ", "_").replace("-", "_")
 
 
-def get_from_iterable(lst: Union[str, Iterable[Hashable], None], index: int):
+def get_from_iterable(lst: str | Iterable[Hashable] | None, index: int):
     """
     Returns the element at the specified index of the list, or None if the index
     is out of bounds.
@@ -99,9 +106,9 @@ def get_from_iterable(lst: Union[str, Iterable[Hashable], None], index: int):
 
 
 def pandas_to_dataarray(
-    arr: Union[pd.DataFrame, pd.Series],
-    coords: Union[Sequence[Union[Sequence, pd.Index, DataArray]], Mapping, None] = None,
-    dims: Union[Iterable[Hashable], None] = None,
+    arr: pd.DataFrame | pd.Series,
+    coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None,
+    dims: Iterable[Hashable] | None = None,
     **kwargs,
 ) -> DataArray:
     """
@@ -156,8 +163,8 @@ def pandas_to_dataarray(
 
 def numpy_to_dataarray(
     arr: np.ndarray,
-    coords: Union[Sequence[Union[Sequence, pd.Index, DataArray]], Mapping, None] = None,
-    dims: Union[str, Iterable[Hashable], None] = None,
+    coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None,
+    dims: str | Iterable[Hashable] | None = None,
     **kwargs,
 ) -> DataArray:
     """
@@ -195,8 +202,8 @@ def numpy_to_dataarray(
 
 def as_dataarray(
     arr,
-    coords: Union[Sequence[Union[Sequence, pd.Index, DataArray]], Mapping, None] = None,
-    dims: Union[str, Iterable[Hashable], None] = None,
+    coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None,
+    dims: str | Iterable[Hashable] | None = None,
     **kwargs,
 ) -> DataArray:
     """
@@ -246,7 +253,7 @@ def as_dataarray(
 
 
 # TODO: rename to to_pandas_dataframe
-def to_dataframe(ds: Dataset, mask_func: Union[Callable, None] = None):
+def to_dataframe(ds: Dataset, mask_func: Callable | None = None):
     """
     Convert an xarray Dataset to a pandas DataFrame.
 
@@ -467,6 +474,65 @@ def fill_missing_coords(ds, fill_helper_dims: bool = False):
     return ds
 
 
+def iterate_slices(
+    ds: Dataset | Variable | LinearExpression | Constraint,
+    slice_size: int | None = 10_000,
+    slice_dims: list | None = None,
+) -> Generator[Dataset | Variable | LinearExpression | Constraint, None, None]:
+    """
+    Generate slices of an xarray Dataset or DataArray with a specified soft maximum size.
+
+    The slicing is performed on the largest dimension of the input object.
+    If the maximum size is larger than the total size of the object, the function yields
+    the original object.
+
+    Parameters
+    ----------
+    ds : xarray.Dataset or xarray.DataArray
+        The input xarray Dataset or DataArray to be sliced.
+    slice_size : int
+        The maximum number of elements in each slice. If the maximum size is too small to accommodate any slice,
+        the function splits the largest dimension.
+    slice_dims : list, optional
+        The dimensions to slice along. If None, all dimensions in `coord_dims` are used if
+        `coord_dims` is an attribute of the input object. Otherwise, all dimensions are used.
+
+    Yields
+    ------
+    xarray.Dataset or xarray.DataArray
+        A slice of the input Dataset or DataArray.
+
+    """
+    if slice_dims is None:
+        slice_dims = list(getattr(ds, "coord_dims", ds.dims))
+
+    # Calculate the total number of elements in the dataset
+    size = np.prod([ds.sizes[dim] for dim in ds.dims], dtype=int)
+
+    if slice_size is None or size <= slice_size:
+        yield ds
+        return
+
+    # number of slices
+    n_slices = max(size // slice_size, 1)
+
+    # leading dimension (the dimension with the largest size)
+    leading_dim = max(ds.sizes, key=ds.sizes.get)  # type: ignore
+    size_of_leading_dim = ds.sizes[leading_dim]
+
+    if size_of_leading_dim < n_slices:
+        n_slices = size_of_leading_dim
+
+    chunk_size = ds.sizes[leading_dim] // n_slices
+
+    # Iterate over the Cartesian product of slice indices
+    for i in range(n_slices):
+        start = i * chunk_size
+        end = start + chunk_size
+        slice_dict = {leading_dim: slice(start, end)}
+        yield ds.isel(slice_dict)
+
+
 def _remap(array, mapping):
     return mapping[array.ravel()].reshape(array.shape)
 
@@ -484,7 +550,7 @@ def replace_by_map(ds, mapping):
     )
 
 
-def to_path(path: Union[str, Path, None]) -> Union[Path, None]:
+def to_path(path: str | Path | None) -> Path | None:
     """
     Convert a string to a Path object.
     """
@@ -526,7 +592,7 @@ def generate_indices_for_printout(dim_sizes, max_lines):
             yield tuple(np.unravel_index(i, dim_sizes))
 
 
-def align_lines_by_delimiter(lines: list[str], delimiter: Union[str, list[str]]):
+def align_lines_by_delimiter(lines: list[str], delimiter: str | list[str]):
     # Determine the maximum position of the delimiter
     if isinstance(delimiter, str):
         delimiter = [delimiter]
@@ -548,17 +614,18 @@ def align_lines_by_delimiter(lines: list[str], delimiter: Union[str, list[str]])
 
 
 def get_label_position(
-    obj, values: Union[int, np.ndarray]
-) -> Union[
-    Union[tuple[str, dict], tuple[None, None]],
-    list[Union[tuple[str, dict], tuple[None, None]]],
-    list[list[Union[tuple[str, dict], tuple[None, None]]]],
-]:
+    obj, values: int | np.ndarray
+) -> (
+    tuple[str, dict]
+    | tuple[None, None]
+    | list[tuple[str, dict] | tuple[None, None]]
+    | list[list[tuple[str, dict] | tuple[None, None]]]
+):
     """
     Get tuple of name and coordinate for variable labels.
     """
 
-    def find_single(value: int) -> Union[tuple[str, dict], tuple[None, None]]:
+    def find_single(value: int) -> tuple[str, dict] | tuple[None, None]:
         if value == -1:
             return None, None
         for name, val in obj.items():

diff --git a/linopy/constraints.py b/linopy/constraints.py
@@ -40,6 +40,7 @@
     has_optimized_model,
     infer_schema_polars,
     is_constant,
+    iterate_slices,
     maybe_replace_signs,
     print_coord,
     print_single_constraint,
@@ -658,6 +659,8 @@ def to_polars(self):
 
     stack = conwrap(Dataset.stack)
 
+    iterate_slices = iterate_slices
+
 
 @dataclass(repr=False)
 class Constraints:

diff --git a/linopy/expressions.py b/linopy/expressions.py
@@ -52,6 +52,7 @@
     get_index_map,
     group_terms_polars,
     has_optimized_model,
+    iterate_slices,
     print_single_expression,
     to_dataframe,
     to_polars,
@@ -1457,6 +1458,8 @@ def to_polars(self) -> pl.DataFrame:
 
     stack = exprwrap(Dataset.stack)
 
+    iterate_slices = iterate_slices
+
 
 class QuadraticExpression(LinearExpression):
     """