Skip to content

Commit

Permalink
io: write out lp file with sliced variables and constraints
Browse files Browse the repository at this point in the history
  • Loading branch information
FabianHofmann committed Oct 21, 2024
1 parent 03f3cc7 commit 4dba46a
Show file tree
Hide file tree
Showing 12 changed files with 394 additions and 162 deletions.
2 changes: 2 additions & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Release Notes
Upcoming Version
----------------

* When writing out an LP file, large variables and constraints are now chunked to avoid memory issues. This is especially useful for large models with constraints with many terms. The chunk size can be set with the `slice_size` argument in the `solve` function.

Version 0.3.15
--------------

Expand Down
107 changes: 87 additions & 20 deletions linopy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
This module contains commonly used functions.
"""

from __future__ import annotations

import operator
import os
from collections.abc import Hashable, Iterable, Mapping, Sequence
from collections.abc import Generator, Hashable, Iterable, Mapping, Sequence
from functools import reduce, wraps
from pathlib import Path
from typing import Any, Callable, Union, overload
from typing import TYPE_CHECKING, Any, Callable, overload
from warnings import warn

import numpy as np
Expand All @@ -30,6 +32,11 @@
sign_replace_dict,
)

if TYPE_CHECKING:
from linopy.constraints import Constraint
from linopy.expressions import LinearExpression
from linopy.variables import Variable


def maybe_replace_sign(sign: str) -> str:
"""
Expand Down Expand Up @@ -86,7 +93,7 @@ def format_string_as_variable_name(name: Hashable):
return str(name).replace(" ", "_").replace("-", "_")


def get_from_iterable(lst: Union[str, Iterable[Hashable], None], index: int):
def get_from_iterable(lst: str | Iterable[Hashable] | None, index: int):
"""
Returns the element at the specified index of the list, or None if the index
is out of bounds.
Expand All @@ -99,9 +106,9 @@ def get_from_iterable(lst: Union[str, Iterable[Hashable], None], index: int):


def pandas_to_dataarray(
arr: Union[pd.DataFrame, pd.Series],
coords: Union[Sequence[Union[Sequence, pd.Index, DataArray]], Mapping, None] = None,
dims: Union[Iterable[Hashable], None] = None,
arr: pd.DataFrame | pd.Series,
coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None,
dims: Iterable[Hashable] | None = None,
**kwargs,
) -> DataArray:
"""
Expand Down Expand Up @@ -156,8 +163,8 @@ def pandas_to_dataarray(

def numpy_to_dataarray(
arr: np.ndarray,
coords: Union[Sequence[Union[Sequence, pd.Index, DataArray]], Mapping, None] = None,
dims: Union[str, Iterable[Hashable], None] = None,
coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None,
dims: str | Iterable[Hashable] | None = None,
**kwargs,
) -> DataArray:
"""
Expand Down Expand Up @@ -195,8 +202,8 @@ def numpy_to_dataarray(

def as_dataarray(
arr,
coords: Union[Sequence[Union[Sequence, pd.Index, DataArray]], Mapping, None] = None,
dims: Union[str, Iterable[Hashable], None] = None,
coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None,
dims: str | Iterable[Hashable] | None = None,
**kwargs,
) -> DataArray:
"""
Expand Down Expand Up @@ -246,7 +253,7 @@ def as_dataarray(


# TODO: rename to to_pandas_dataframe
def to_dataframe(ds: Dataset, mask_func: Union[Callable, None] = None):
def to_dataframe(ds: Dataset, mask_func: Callable | None = None):
"""
Convert an xarray Dataset to a pandas DataFrame.
Expand Down Expand Up @@ -467,6 +474,65 @@ def fill_missing_coords(ds, fill_helper_dims: bool = False):
return ds


def iterate_slices(
ds: Dataset | Variable | LinearExpression | Constraint,
slice_size: int | None = 10_000,
slice_dims: list | None = None,
) -> Generator[Dataset | Variable | LinearExpression | Constraint, None, None]:
"""
Generate slices of an xarray Dataset or DataArray with a specified soft maximum size.
The slicing is performed on the largest dimension of the input object.
If the maximum size is larger than the total size of the object, the function yields
the original object.
Parameters
----------
ds : xarray.Dataset or xarray.DataArray
The input xarray Dataset or DataArray to be sliced.
slice_size : int
The maximum number of elements in each slice. If the maximum size is too small to accommodate any slice,
the function splits the largest dimension.
slice_dims : list, optional
The dimensions to slice along. If None, all dimensions in `coord_dims` are used if
`coord_dims` is an attribute of the input object. Otherwise, all dimensions are used.
Yields
------
xarray.Dataset or xarray.DataArray
A slice of the input Dataset or DataArray.
"""
if slice_dims is None:
slice_dims = list(getattr(ds, "coord_dims", ds.dims))

# Calculate the total number of elements in the dataset
size = np.prod([ds.sizes[dim] for dim in ds.dims], dtype=int)

if slice_size is None or size <= slice_size:
yield ds
return

# number of slices
n_slices = max(size // slice_size, 1)

# leading dimension (the dimension with the largest size)
leading_dim = max(ds.sizes, key=ds.sizes.get) # type: ignore
size_of_leading_dim = ds.sizes[leading_dim]

if size_of_leading_dim < n_slices:
n_slices = size_of_leading_dim

chunk_size = ds.sizes[leading_dim] // n_slices

# Iterate over the Cartesian product of slice indices
for i in range(n_slices):
start = i * chunk_size
end = start + chunk_size
slice_dict = {leading_dim: slice(start, end)}
yield ds.isel(slice_dict)


def _remap(array, mapping):
return mapping[array.ravel()].reshape(array.shape)

Expand All @@ -484,7 +550,7 @@ def replace_by_map(ds, mapping):
)


def to_path(path: Union[str, Path, None]) -> Union[Path, None]:
def to_path(path: str | Path | None) -> Path | None:
"""
Convert a string to a Path object.
"""
Expand Down Expand Up @@ -526,7 +592,7 @@ def generate_indices_for_printout(dim_sizes, max_lines):
yield tuple(np.unravel_index(i, dim_sizes))


def align_lines_by_delimiter(lines: list[str], delimiter: Union[str, list[str]]):
def align_lines_by_delimiter(lines: list[str], delimiter: str | list[str]):
# Determine the maximum position of the delimiter
if isinstance(delimiter, str):
delimiter = [delimiter]
Expand All @@ -548,17 +614,18 @@ def align_lines_by_delimiter(lines: list[str], delimiter: Union[str, list[str]])


def get_label_position(
obj, values: Union[int, np.ndarray]
) -> Union[
Union[tuple[str, dict], tuple[None, None]],
list[Union[tuple[str, dict], tuple[None, None]]],
list[list[Union[tuple[str, dict], tuple[None, None]]]],
]:
obj, values: int | np.ndarray
) -> (
tuple[str, dict]
| tuple[None, None]
| list[tuple[str, dict] | tuple[None, None]]
| list[list[tuple[str, dict] | tuple[None, None]]]
):
"""
Get tuple of name and coordinate for variable labels.
"""

def find_single(value: int) -> Union[tuple[str, dict], tuple[None, None]]:
def find_single(value: int) -> tuple[str, dict] | tuple[None, None]:
if value == -1:
return None, None
for name, val in obj.items():
Expand Down
3 changes: 3 additions & 0 deletions linopy/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
has_optimized_model,
infer_schema_polars,
is_constant,
iterate_slices,
maybe_replace_signs,
print_coord,
print_single_constraint,
Expand Down Expand Up @@ -658,6 +659,8 @@ def to_polars(self):

stack = conwrap(Dataset.stack)

iterate_slices = iterate_slices


@dataclass(repr=False)
class Constraints:
Expand Down
3 changes: 3 additions & 0 deletions linopy/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
get_index_map,
group_terms_polars,
has_optimized_model,
iterate_slices,
print_single_expression,
to_dataframe,
to_polars,
Expand Down Expand Up @@ -1457,6 +1458,8 @@ def to_polars(self) -> pl.DataFrame:

stack = exprwrap(Dataset.stack)

iterate_slices = iterate_slices


class QuadraticExpression(LinearExpression):
"""
Expand Down
Loading

0 comments on commit 4dba46a

Please sign in to comment.