MrPowers · fpgmaas · Jul 20, 2024 · Jul 17, 2024 · Jul 18, 2024 · Jul 19, 2024
diff --git a/README.md b/README.md
@@ -311,31 +311,51 @@ assert_df_equality(df1, df2, allow_nan_equality=True)
 
 ## Customize formatting
 
-*Available in chispa 0.10+*.
-
 You can specify custom formats for the printed error messages as follows:
 
 ```python
-@dataclass
-class MyFormats:
-    mismatched_rows = ["light_yellow"]
-    matched_rows = ["cyan", "bold"]
-    mismatched_cells = ["purple"]
-    matched_cells = ["blue"]
+from chispa.formatting import FormattingConfig
+
+formats = FormattingConfig(
+        mismatched_rows={"color": "light_yellow"},
+        matched_rows={"color": "cyan", "style": "bold"},
+        mismatched_cells={"color": "purple"},
+        matched_cells={"color": "blue"},
+    )
 
-assert_basic_rows_equality(df1.collect(), df2.collect(), formats=MyFormats())
+assert_basic_rows_equality(df1.collect(), df2.collect(), formats=formats)
+```
+
+or similarly:
+
+```python
+from chispa.formatting import FormattingConfig, Color, Style
+
+formats = FormattingConfig(
+        mismatched_rows={"color": Color.LIGHT_YELLOW},
+        matched_rows={"color": Color.CYAN, "style": Style.BOLD},
+        mismatched_cells={"color": Color.PURPLE},
+        matched_cells={"color": Color.BLUE},
+    )
+
+assert_basic_rows_equality(df1.collect(), df2.collect(), formats=formats)
 ```
 
 You can also define these formats in `conftest.py` and inject them via a fixture:
 
 ```python
 @pytest.fixture()
-def my_formats():
-    return MyFormats()
+def chispa_formats():
+    return FormattingConfig(
+        mismatched_rows={"color": "light_yellow"},
+        matched_rows={"color": "cyan", "style": "bold"},
+        mismatched_cells={"color": "purple"},
+        matched_cells={"color": "blue"},
+    )
 
-def test_shows_assert_basic_rows_equality(my_formats):
+def test_shows_assert_basic_rows_equality(chispa_formats):
   ...
-  assert_basic_rows_equality(df1.collect(), df2.collect(), formats=my_formats)
+  assert_basic_rows_equality(df1.collect(), df2.collect(), formats=chispa_formats)
 ```
 
 ![custom_formats](https://github.com/MrPowers/chispa/blob/main/images/custom_formats.png)

diff --git a/chispa/__init__.py b/chispa/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 import sys
 from glob import glob
@@ -27,7 +29,8 @@
             print("Can't find Apache Spark. Please set environment variable SPARK_HOME to root of installation!")
             exit(-1)
 
-from chispa.default_formats import DefaultFormats
+from chispa.default_formats import DefaultFormats, convert_to_formatting_config
+from chispa.formatting.formats import Color, Format, FormattingConfig, Style
 
 from .column_comparer import (
     ColumnsNotEqualError,
@@ -43,8 +46,14 @@
 
 
 class Chispa:
-    def __init__(self, formats=DefaultFormats(), default_output=None):
-        self.formats = formats
+    def __init__(self, formats: FormattingConfig | None = None, default_output=None):
+        if not formats:
+            self.formats = FormattingConfig()
+        elif type(formats) is FormattingConfig:
+            self.formats = formats
+        else:
+            self.formats = convert_to_formatting_config(formats)
+
         self.default_outputs = default_output
 
     def assert_df_equality(
@@ -81,6 +90,10 @@ def assert_df_equality(
     "assert_column_equality",
     "assert_approx_column_equality",
     "assert_basic_rows_equality",
-    "DefaultFormats",
+    "Style",
+    "Color",
+    "FormattingConfig",
+    "Format",
     "Chispa",
+    "DefaultFormats",
 )
diff --git a/chispa/bcolors.py b/chispa/bcolors.py
@@ -1,3 +1,6 @@
+from __future__ import annotations
+
+
 class bcolors:
     NC = "\033[0m"  # No Color, reset all
 

diff --git a/chispa/column_comparer.py b/chispa/column_comparer.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from prettytable import PrettyTable
 
 from chispa.bcolors import bcolors

diff --git a/chispa/dataframe_comparer.py b/chispa/dataframe_comparer.py
@@ -1,6 +1,9 @@
+from __future__ import annotations
+
 from functools import reduce
 
-from chispa.default_formats import DefaultFormats
+from chispa.default_formats import convert_to_formatting_config
+from chispa.formatting.formats import FormattingConfig
 from chispa.row_comparer import are_rows_approx_equal, are_rows_equal_enhanced
 from chispa.rows_comparer import (
     assert_basic_rows_equality,
@@ -25,8 +28,13 @@ def assert_df_equality(
     ignore_row_order=False,
     underline_cells=False,
     ignore_metadata=False,
-    formats=DefaultFormats(),
+    formats: FormattingConfig | None = None,
 ):
+    if not formats:
+        formats = FormattingConfig()
+    elif type(formats) is not FormattingConfig:
+        formats = convert_to_formatting_config(formats)
+
     if transforms is None:
         transforms = []
     if ignore_column_order:
@@ -71,8 +79,16 @@ def assert_approx_df_equality(
     allow_nan_equality=False,
     ignore_column_order=False,
     ignore_row_order=False,
-    formats=DefaultFormats(),
+    formats: FormattingConfig | None = None,
 ):
+    if not formats:
+        formats = FormattingConfig()
+    elif type(formats) is not FormattingConfig:
+        formats = convert_to_formatting_config(formats)
+
+    elif type(formats) is not FormattingConfig:
+        formats = convert_to_formatting_config(formats)
+
     if transforms is None:
         transforms = []
     if ignore_column_order:

diff --git a/chispa/default_formats.py b/chispa/default_formats.py
@@ -1,9 +1,67 @@
-from dataclasses import dataclass
+from __future__ import annotations
+
+import warnings
+from dataclasses import dataclass, field
+from typing import Any
+
+from chispa.formatting import Color, Format, FormattingConfig, Style
 
 
 @dataclass
 class DefaultFormats:
-    mismatched_rows = ["red"]
-    matched_rows = ["blue"]
-    mismatched_cells = ["red", "underline"]
-    matched_cells = ["blue"]
+    """
+    This class is now deprecated and should be removed in a future release, together with `convert_to_formatting_config`.
+    """
+
+    mismatched_rows: list[str] = field(default_factory=lambda: ["red"])
+    matched_rows: list[str] = field(default_factory=lambda: ["blue"])
+    mismatched_cells: list[str] = field(default_factory=lambda: ["red", "underline"])
+    matched_cells: list[str] = field(default_factory=lambda: ["blue"])
+
+    def __post_init__(self):
+        warnings.warn(
+            "DefaultFormats is deprecated. Use `chispa.formatting.FormattingConfig` instead.", DeprecationWarning
+        )
+
+
+def convert_to_formatting_config(instance: Any) -> FormattingConfig:
+    """
+    Converts an instance of an arbitrary class with specified fields to a FormattingConfig instance.
+    This class is purely for backwards compatibility and should be removed in a future release.
+    """
+
+    if type(instance) is not DefaultFormats:
+        warnings.warn(
+            "Using an arbitrary dataclass is deprecated. Use `chispa.formatting.FormattingConfig` instead.",
+            DeprecationWarning,
+        )
+
+    def _convert_to_format(values: list[str]) -> Format:
+        color = None
+        styles = []
+        valid_colors = [c.name.lower() for c in Color]
+        valid_styles = [s.name.lower() for s in Style]
+
+        for value in values:
+            if value in valid_colors:
+                color = Color[value.upper()]
+            elif value in valid_styles:
+                styles.append(Style[value.upper()])
+            else:
+                raise ValueError(
+                    f"Invalid value: {value}. Valid values are colors: {valid_colors} and styles: {valid_styles}"
+                )
+
+        return Format(color=color, style=styles if styles else None)
+
+    mismatched_rows = _convert_to_format(getattr(instance, "mismatched_rows"))
+    matched_rows = _convert_to_format(getattr(instance, "matched_rows"))
+    mismatched_cells = _convert_to_format(getattr(instance, "mismatched_cells"))
+    matched_cells = _convert_to_format(getattr(instance, "matched_cells"))
+
+    return FormattingConfig(
+        mismatched_rows=mismatched_rows,
+        matched_rows=matched_rows,
+        mismatched_cells=mismatched_cells,
+        matched_cells=matched_cells,
+    )
diff --git a/chispa/formatting/__init__.py b/chispa/formatting/__init__.py
@@ -0,0 +1,6 @@
+from __future__ import annotations
+
+from chispa.formatting.formats import RESET, Color, Format, FormattingConfig, Style
+from chispa.formatting.terminal_string_formatter import format_string
+
+__all__ = ("Style", "Color", "FormattingConfig", "Format", "format_string", "RESET")
diff --git a/chispa/formatting/formats.py b/chispa/formatting/formats.py
@@ -0,0 +1,127 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import ClassVar
+
+RESET = "\033[0m"
+
+
+class Color(str, Enum):
+    BLACK = "\033[30m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    BLUE = "\033[34m"
+    PURPLE = "\033[35m"
+    CYAN = "\033[36m"
+    LIGHT_GRAY = "\033[37m"
+    DARK_GRAY = "\033[90m"
+    LIGHT_RED = "\033[91m"
+    LIGHT_GREEN = "\033[92m"
+    LIGHT_YELLOW = "\033[93m"
+    LIGHT_BLUE = "\033[94m"
+    LIGHT_PURPLE = "\033[95m"
+    LIGHT_CYAN = "\033[96m"
+    WHITE = "\033[97m"
+
+
+class Style(str, Enum):
+    BOLD = "\033[1m"
+    UNDERLINE = "\033[4m"
+    BLINK = "\033[5m"
+    INVERT = "\033[7m"
+    HIDE = "\033[8m"
+
+
+@dataclass
+class Format:
+    color: Color | None = None
+    style: list[Style] | None = None
+
+    @classmethod
+    def from_dict(cls, format_dict: dict) -> Format:
+        if not isinstance(format_dict, dict):
+            raise ValueError("Input must be a dictionary")
+
+        color = cls._get_color_enum(format_dict.get("color"))
+        style = format_dict.get("style")
+        if isinstance(style, str):
+            styles = [cls._get_style_enum(style)]
+        elif isinstance(style, list):
+            styles = [cls._get_style_enum(s) for s in style]
+        else:
+            styles = None
+
+        return cls(color=color, style=styles)
+
+    @staticmethod
+    def _get_color_enum(color: Color | str | None) -> Color | None:
+        if isinstance(color, Color):
+            return color
+        elif isinstance(color, str):
+            try:
+                return Color[color.upper()]
+            except KeyError:
+                valid_colors = [c.name.lower() for c in Color]
+                raise ValueError(f"Invalid color name: {color}. Valid color names are {valid_colors}")
+        return None
+
+    @staticmethod
+    def _get_style_enum(style: Style | str | None) -> Style | None:
+        if isinstance(style, Style):
+            return style
+        elif isinstance(style, str):
+            try:
+                return Style[style.upper()]
+            except KeyError:
+                valid_styles = [f.name.lower() for f in Style]
+                raise ValueError(f"Invalid style name: {style}. Valid style names are {valid_styles}")
+        return None
+
+
+class FormattingConfig:
+    """
+    Class to manage and parse formatting configurations.
+    """
+
+    VALID_KEYS: ClassVar = {"color", "style"}
+
+    def __init__(
+        self,
+        mismatched_rows: Format | dict = Format(Color.RED),
+        matched_rows: Format | dict = Format(Color.BLUE),
+        mismatched_cells: Format | dict = Format(Color.RED, [Style.UNDERLINE]),
+        matched_cells: Format | dict = Format(Color.BLUE),
+    ):
+        """
+        Initializes the FormattingConfig with given or default formatting.
+
+        Each of the arguments can be provided as a `Format` object or a dictionary with the following keys:
+        - 'color': A string representing a color name, which should be one of the valid colors:
+            ['black', 'red', 'green', 'yellow', 'blue', 'purple', 'cyan', 'light_gray',
+            'dark_gray', 'light_red', 'light_green', 'light_yellow', 'light_blue',
+            'light_purple', 'light_cyan', 'white'].
+        - 'style': A string or list of strings representing styles, which should be one of the valid styles:
+            ['bold', 'underline', 'blink', 'invert', 'hide'].
+
+        Args:
+            mismatched_rows (Format | dict): Format or dictionary for mismatched rows.
+            matched_rows (Format | dict): Format or dictionary for matched rows.
+            mismatched_cells (Format | dict): Format or dictionary for mismatched cells.
+            matched_cells (Format | dict): Format or dictionary for matched cells.
+
+        Raises:
+            ValueError: If the dictionary contains invalid keys or values.
+        """
+        self.mismatched_rows: Format = self._parse_format(mismatched_rows)
+        self.matched_rows: Format = self._parse_format(matched_rows)
+        self.mismatched_cells: Format = self._parse_format(mismatched_cells)
+        self.matched_cells: Format = self._parse_format(matched_cells)
+
+    def _parse_format(self, format: Format | dict) -> Format:
+        if isinstance(format, Format):
+            return format
+        elif isinstance(format, dict):
+            return Format.from_dict(format)
+        raise ValueError("Invalid format type. Must be Format or dict.")
diff --git a/chispa/formatting/terminal_string_formatter.py b/chispa/formatting/terminal_string_formatter.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from chispa.formatting.formats import RESET, Format
+
+
+def format_string(input_string: str, format: Format) -> str:
+    if not format.color and not format.style:
+        return input_string
+
+    formatted_string = input_string
+    codes = []
+
+    if format.style:
+        for style in format.style:
+            codes.append(style.value)
+
+    if format.color:
+        codes.append(format.color.value)
+
+    formatted_string = "".join(codes) + formatted_string + RESET
+    return formatted_string