Skip to content

Commit 0d98253

Browse files
committed
add new custom formatter to readme
1 parent 9199070 commit 0d98253

8 files changed

+38
-17
lines changed

README.md

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -303,23 +303,42 @@ nan2 = float('nan')
303303
nan1 == nan2 # False
304304
```
305305

306-
Pandas, a popular DataFrame library, does consider NaN values to be equal by default.
307-
308-
This library requires you to set a flag to consider two NaN values to be equal.
306+
pandas considers NaN values to be equal by default, but this library requires you to set a flag to consider two NaN values to be equal.
309307

310308
```python
311309
assert_df_equality(df1, df2, allow_nan_equality=True)
312310
```
313311

314-
### Underline differences within rows
312+
## Customize formatting
313+
314+
*Available in chispa 0.10+*.
315315

316-
You can choose to underline columns within a row that are different by setting `underline_cells` to True, i.e.:
316+
You can specify custom formats for the printed error messages as follows:
317317

318318
```python
319-
assert_df_equality(df1, df2, underline_cells=True)
319+
@dataclass
320+
class MyFormats:
321+
mismatched_rows = ["light_yellow"]
322+
matched_rows = ["cyan", "bold"]
323+
mismatched_cells = ["purple"]
324+
matched_cells = ["blue"]
325+
326+
assert_basic_rows_equality(df1.collect(), df2.collect(), formats=MyFormats())
327+
```
328+
329+
You can also define these formats in `conftest.py` and inject them via a fixture:
330+
331+
```python
332+
@pytest.fixture()
333+
def my_formats():
334+
return MyFormats()
335+
336+
def test_shows_assert_basic_rows_equality(my_formats):
337+
...
338+
assert_basic_rows_equality(df1.collect(), df2.collect(), formats=my_formats)
320339
```
321340

322-
![DfsNotEqualUnderlined](https://github.com/MrPowers/chispa/blob/main/images/df_not_equal_underlined.png)
341+
![custom_formats](https://github.com/MrPowers/chispa/blob/main/images/custom_formats.png)
323342

324343
## Approximate column equality
325344

chispa/rows_comparer.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
2727
for r1_field, r2_field in r_zipped:
2828
if r1[r1_field] != r2[r2_field]:
2929
all_rows_equal = False
30-
r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.mismatched_cells))
31-
r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.mismatched_cells))
30+
r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.mismatched_cells))
31+
r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.mismatched_cells))
3232
else:
33-
r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.matched_cells))
34-
r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.matched_cells))
33+
r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.matched_cells))
34+
r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.matched_cells))
3535
r1_res = ", ".join(r1_string)
3636
r2_res = ", ".join(r2_string)
3737

@@ -64,11 +64,11 @@ def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fu
6464
for r1_field, r2_field in r_zipped:
6565
if r1[r1_field] != r2[r2_field]:
6666
all_rows_equal = False
67-
r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.mismatched_cells))
68-
r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.mismatched_cells))
67+
r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.mismatched_cells))
68+
r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.mismatched_cells))
6969
else:
70-
r1_string.append(format_string(f"{r1_field}='{r1[r1_field]}'", formats.matched_cells))
71-
r2_string.append(format_string(f"{r2_field}='{r2[r2_field]}'", formats.matched_cells))
70+
r1_string.append(format_string(f"{r1_field}={r1[r1_field]}", formats.matched_cells))
71+
r2_string.append(format_string(f"{r2_field}={r2[r2_field]}", formats.matched_cells))
7272
r1_res = ", ".join(r1_string)
7373
r2_res = ", ".join(r2_string)
7474

images/custom_formats.png

832 KB
Loading

images/dfs_not_equal_error.png

154 KB
Loading

images/dfs_not_equal_error_old.png

50 KB
Loading

images/ignore_row_order_false.png

-147 KB
Loading

images/ignore_row_order_false_old.png

299 KB
Loading

tests/test_readme_examples.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def remove_non_word_characters(col):
1919

2020

2121
def describe_column_equality():
22-
def it_removes_non_word_characters_short():
22+
def test_removes_non_word_characters_short():
2323
data = [
2424
("jo&&se", "jose"),
2525
("**li**", "li"),
@@ -31,7 +31,7 @@ def it_removes_non_word_characters_short():
3131
assert_column_equality(df, "clean_name", "expected_name")
3232

3333

34-
def it_removes_non_word_characters_nice_error():
34+
def test_remove_non_word_characters_nice_error():
3535
data = [
3636
("matt7", "matt"),
3737
("bill&", "bill"),
@@ -40,6 +40,7 @@ def it_removes_non_word_characters_nice_error():
4040
]
4141
df = spark.createDataFrame(data, ["name", "expected_name"])\
4242
.withColumn("clean_name", remove_non_word_characters(F.col("name")))
43+
# assert_column_equality(df, "clean_name", "expected_name")
4344
with pytest.raises(ColumnsNotEqualError) as e_info:
4445
assert_column_equality(df, "clean_name", "expected_name")
4546

@@ -95,6 +96,7 @@ def test_remove_non_word_characters_long_error():
9596
def ignore_row_order():
9697
df1 = spark.createDataFrame([(1,), (2,), (3,)], ["some_num"])
9798
df2 = spark.createDataFrame([(2,), (1,), (3,)], ["some_num"])
99+
# assert_df_equality(df1, df2)
98100
assert_df_equality(df1, df2, ignore_row_order=True)
99101

100102

0 commit comments

Comments
 (0)