Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Introduce FormattingConfig and deprecate DefaultFormats #127

Merged
merged 10 commits into from
Jul 20, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 33 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,31 +311,51 @@ assert_df_equality(df1, df2, allow_nan_equality=True)

## Customize formatting

*Available in chispa 0.10+*.

You can specify custom formats for the printed error messages as follows:

```python
@dataclass
class MyFormats:
mismatched_rows = ["light_yellow"]
matched_rows = ["cyan", "bold"]
mismatched_cells = ["purple"]
matched_cells = ["blue"]
from chispa.formatting import FormattingConfig

formats = FormattingConfig(
mismatched_rows={"color": "light_yellow"},
matched_rows={"color": "cyan", "style": "bold"},
mismatched_cells={"color": "purple"},
matched_cells={"color": "blue"},
)

assert_basic_rows_equality(df1.collect(), df2.collect(), formats=MyFormats())
assert_basic_rows_equality(df1.collect(), df2.collect(), formats=formats)
```

or similarly:

```python
from chispa.formatting import FormattingConfig, Color, Style

formats = FormattingConfig(
mismatched_rows={"color": Color.LIGHT_YELLOW},
matched_rows={"color": Color.CYAN, "style": Style.BOLD},
mismatched_cells={"color": Color.PURPLE},
matched_cells={"color": Color.BLUE},
)

assert_basic_rows_equality(df1.collect(), df2.collect(), formats=formats)
```

You can also define these formats in `conftest.py` and inject them via a fixture:

```python
@pytest.fixture()
def my_formats():
return MyFormats()
def chispa_formats():
return FormattingConfig(
mismatched_rows={"color": "light_yellow"},
matched_rows={"color": "cyan", "style": "bold"},
mismatched_cells={"color": "purple"},
matched_cells={"color": "blue"},
)

def test_shows_assert_basic_rows_equality(my_formats):
def test_shows_assert_basic_rows_equality(chispa_formats):
...
assert_basic_rows_equality(df1.collect(), df2.collect(), formats=my_formats)
assert_basic_rows_equality(df1.collect(), df2.collect(), formats=chispa_formats)
```

![custom_formats](https://github.com/MrPowers/chispa/blob/main/images/custom_formats.png)
Expand Down
21 changes: 17 additions & 4 deletions chispa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import os
import sys
from glob import glob
Expand Down Expand Up @@ -27,7 +29,8 @@
print("Can't find Apache Spark. Please set environment variable SPARK_HOME to root of installation!")
exit(-1)

from chispa.default_formats import DefaultFormats
from chispa.default_formats import DefaultFormats, convert_to_formatting_config
from chispa.formatting.formats import Color, Format, FormattingConfig, Style

from .column_comparer import (
ColumnsNotEqualError,
Expand All @@ -43,8 +46,14 @@


class Chispa:
def __init__(self, formats=DefaultFormats(), default_output=None):
self.formats = formats
def __init__(self, formats: FormattingConfig | None = None, default_output=None):
if not formats:
self.formats = FormattingConfig()
elif type(formats) is FormattingConfig:
fpgmaas marked this conversation as resolved.
Show resolved Hide resolved
self.formats = formats
else:
self.formats = convert_to_formatting_config(formats)

self.default_outputs = default_output

def assert_df_equality(
Expand Down Expand Up @@ -81,6 +90,10 @@ def assert_df_equality(
"assert_column_equality",
"assert_approx_column_equality",
"assert_basic_rows_equality",
"DefaultFormats",
"Style",
"Color",
"FormattingConfig",
"Format",
"Chispa",
"DefaultFormats",
)
3 changes: 3 additions & 0 deletions chispa/bcolors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from __future__ import annotations


class bcolors:
NC = "\033[0m" # No Color, reset all

Expand Down
2 changes: 2 additions & 0 deletions chispa/column_comparer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from prettytable import PrettyTable

from chispa.bcolors import bcolors
Expand Down
22 changes: 19 additions & 3 deletions chispa/dataframe_comparer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

from functools import reduce

from chispa.default_formats import DefaultFormats
from chispa.default_formats import convert_to_formatting_config
from chispa.formatting.formats import FormattingConfig
from chispa.row_comparer import are_rows_approx_equal, are_rows_equal_enhanced
from chispa.rows_comparer import (
assert_basic_rows_equality,
Expand All @@ -25,8 +28,13 @@ def assert_df_equality(
ignore_row_order=False,
underline_cells=False,
ignore_metadata=False,
formats=DefaultFormats(),
formats: FormattingConfig | None = None,
):
if not formats:
formats = FormattingConfig()
elif type(formats) is not FormattingConfig:
formats = convert_to_formatting_config(formats)

if transforms is None:
transforms = []
if ignore_column_order:
Expand Down Expand Up @@ -71,8 +79,16 @@ def assert_approx_df_equality(
allow_nan_equality=False,
ignore_column_order=False,
ignore_row_order=False,
formats=DefaultFormats(),
formats: FormattingConfig | None = None,
):
if not formats:
formats = FormattingConfig()
elif type(formats) is not FormattingConfig:
formats = convert_to_formatting_config(formats)

elif type(formats) is not FormattingConfig:
formats = convert_to_formatting_config(formats)

if transforms is None:
transforms = []
if ignore_column_order:
Expand Down
68 changes: 63 additions & 5 deletions chispa/default_formats.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,67 @@
from dataclasses import dataclass
from __future__ import annotations

import warnings
from dataclasses import dataclass, field
from typing import Any

from chispa.formatting import Color, Format, FormattingConfig, Style


@dataclass
class DefaultFormats:
mismatched_rows = ["red"]
fpgmaas marked this conversation as resolved.
Show resolved Hide resolved
matched_rows = ["blue"]
mismatched_cells = ["red", "underline"]
matched_cells = ["blue"]
"""
This class is now deprecated and should be removed in a future release, together with `convert_to_formatting_config`.
"""

mismatched_rows: list[str] = field(default_factory=lambda: ["red"])
matched_rows: list[str] = field(default_factory=lambda: ["blue"])
mismatched_cells: list[str] = field(default_factory=lambda: ["red", "underline"])
matched_cells: list[str] = field(default_factory=lambda: ["blue"])

def __post_init__(self):
warnings.warn(
"DefaultFormats is deprecated. Use `chispa.formatting.FormattingConfig` instead.", DeprecationWarning
)


def convert_to_formatting_config(instance: Any) -> FormattingConfig:
"""
fpgmaas marked this conversation as resolved.
Show resolved Hide resolved
Converts an instance of an arbitrary class with specified fields to a FormattingConfig instance.
This class is purely for backwards compatibility and should be removed in a future release.
"""

if type(instance) is not DefaultFormats:
warnings.warn(
"Using an arbitrary dataclass is deprecated. Use `chispa.formatting.FormattingConfig` instead.",
DeprecationWarning,
)

def _convert_to_format(values: list[str]) -> Format:
color = None
fpgmaas marked this conversation as resolved.
Show resolved Hide resolved
styles = []
valid_colors = [c.name.lower() for c in Color]
valid_styles = [s.name.lower() for s in Style]

for value in values:
if value in valid_colors:
color = Color[value.upper()]
elif value in valid_styles:
styles.append(Style[value.upper()])
else:
raise ValueError(
f"Invalid value: {value}. Valid values are colors: {valid_colors} and styles: {valid_styles}"
)

return Format(color=color, style=styles if styles else None)

mismatched_rows = _convert_to_format(getattr(instance, "mismatched_rows"))
matched_rows = _convert_to_format(getattr(instance, "matched_rows"))
mismatched_cells = _convert_to_format(getattr(instance, "mismatched_cells"))
matched_cells = _convert_to_format(getattr(instance, "matched_cells"))

return FormattingConfig(
mismatched_rows=mismatched_rows,
matched_rows=matched_rows,
mismatched_cells=mismatched_cells,
matched_cells=matched_cells,
)
6 changes: 6 additions & 0 deletions chispa/formatting/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from __future__ import annotations

from chispa.formatting.formats import RESET, Color, Format, FormattingConfig, Style
from chispa.formatting.terminal_string_formatter import format_string

__all__ = ("Style", "Color", "FormattingConfig", "Format", "format_string", "RESET")
127 changes: 127 additions & 0 deletions chispa/formatting/formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from __future__ import annotations

from dataclasses import dataclass
from enum import Enum
from typing import ClassVar

RESET = "\033[0m"


class Color(str, Enum):
BLACK = "\033[30m"
RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
BLUE = "\033[34m"
PURPLE = "\033[35m"
CYAN = "\033[36m"
LIGHT_GRAY = "\033[37m"
DARK_GRAY = "\033[90m"
LIGHT_RED = "\033[91m"
LIGHT_GREEN = "\033[92m"
LIGHT_YELLOW = "\033[93m"
LIGHT_BLUE = "\033[94m"
LIGHT_PURPLE = "\033[95m"
LIGHT_CYAN = "\033[96m"
WHITE = "\033[97m"


class Style(str, Enum):
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
BLINK = "\033[5m"
INVERT = "\033[7m"
HIDE = "\033[8m"


@dataclass
class Format:
color: Color | None = None
style: list[Style] | None = None

@classmethod
def from_dict(cls, format_dict: dict) -> Format:
if not isinstance(format_dict, dict):
raise ValueError("Input must be a dictionary")

color = cls._get_color_enum(format_dict.get("color"))
style = format_dict.get("style")
if isinstance(style, str):
styles = [cls._get_style_enum(style)]
elif isinstance(style, list):
styles = [cls._get_style_enum(s) for s in style]
else:
styles = None

return cls(color=color, style=styles)

@staticmethod
def _get_color_enum(color: Color | str | None) -> Color | None:
if isinstance(color, Color):
return color
elif isinstance(color, str):
try:
return Color[color.upper()]
except KeyError:
valid_colors = [c.name.lower() for c in Color]
raise ValueError(f"Invalid color name: {color}. Valid color names are {valid_colors}")
return None

@staticmethod
def _get_style_enum(style: Style | str | None) -> Style | None:
if isinstance(style, Style):
return style
elif isinstance(style, str):
try:
return Style[style.upper()]
except KeyError:
valid_styles = [f.name.lower() for f in Style]
raise ValueError(f"Invalid style name: {style}. Valid style names are {valid_styles}")
return None


class FormattingConfig:
"""
Class to manage and parse formatting configurations.
"""

VALID_KEYS: ClassVar = {"color", "style"}

def __init__(
self,
mismatched_rows: Format | dict = Format(Color.RED),
matched_rows: Format | dict = Format(Color.BLUE),
mismatched_cells: Format | dict = Format(Color.RED, [Style.UNDERLINE]),
matched_cells: Format | dict = Format(Color.BLUE),
):
"""
Initializes the FormattingConfig with given or default formatting.

Each of the arguments can be provided as a `Format` object or a dictionary with the following keys:
- 'color': A string representing a color name, which should be one of the valid colors:
['black', 'red', 'green', 'yellow', 'blue', 'purple', 'cyan', 'light_gray',
'dark_gray', 'light_red', 'light_green', 'light_yellow', 'light_blue',
'light_purple', 'light_cyan', 'white'].
- 'style': A string or list of strings representing styles, which should be one of the valid styles:
['bold', 'underline', 'blink', 'invert', 'hide'].

Args:
mismatched_rows (Format | dict): Format or dictionary for mismatched rows.
matched_rows (Format | dict): Format or dictionary for matched rows.
mismatched_cells (Format | dict): Format or dictionary for mismatched cells.
matched_cells (Format | dict): Format or dictionary for matched cells.

Raises:
ValueError: If the dictionary contains invalid keys or values.
"""
self.mismatched_rows: Format = self._parse_format(mismatched_rows)
self.matched_rows: Format = self._parse_format(matched_rows)
self.mismatched_cells: Format = self._parse_format(mismatched_cells)
self.matched_cells: Format = self._parse_format(matched_cells)

def _parse_format(self, format: Format | dict) -> Format:
if isinstance(format, Format):
return format
elif isinstance(format, dict):
return Format.from_dict(format)
raise ValueError("Invalid format type. Must be Format or dict.")
21 changes: 21 additions & 0 deletions chispa/formatting/terminal_string_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import annotations

from chispa.formatting.formats import RESET, Format


def format_string(input_string: str, format: Format) -> str:
fpgmaas marked this conversation as resolved.
Show resolved Hide resolved
if not format.color and not format.style:
return input_string

formatted_string = input_string
codes = []

if format.style:
for style in format.style:
codes.append(style.value)

if format.color:
codes.append(format.color.value)

formatted_string = "".join(codes) + formatted_string + RESET
return formatted_string
Loading
Loading