Merge pull request #80 from fpgmaas/fix/six-and-update

Drop support for Python 3.7 and Pyspark 2.x and remove vendored dependency on six
MrPowers · Jul 16, 2024 · 2e2d3cc · 2e2d3cc
2 parents bd822a4 + c50cbe7
commit 2e2d3cc
Show file tree

Hide file tree

Showing 8 changed files with 389 additions and 1,283 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ chispa.egg-info/
 tmp/
 .idea/
 .DS_Store
+.python_version
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/.python-version b/.python-version
diff --git a/README.md b/README.md
@@ -475,9 +475,6 @@ TODO: Need to benchmark these methods vs. the spark-testing-base ones
 
 ## Vendored dependencies
 
-These dependencies are vendored:
-
-* [six](https://github.com/benjaminp/six)
 * [PrettyTable](https://github.com/jazzband/prettytable)
 
 The dependencies are vendored to save you from dependency hell.

diff --git a/chispa/rows_comparer.py b/chispa/rows_comparer.py
@@ -1,4 +1,4 @@
-import chispa.six as six
+from itertools import zip_longest
 from chispa.prettytable import PrettyTable
 from chispa.bcolors import *
 import chispa
@@ -11,8 +11,9 @@
 def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=DefaultFormats()):
     if rows1 != rows2:
         t = PrettyTable(["df1", "df2"])
-        zipped = list(six.moves.zip_longest(rows1, rows2))
+        zipped = list(zip_longest(rows1, rows2))
         all_rows_equal = True
+
         for r1, r2 in zipped:
             if r1 is None and r2 is not None:
                 t.add_row([None, format_string(r2, formats.mismatched_rows)])
@@ -21,7 +22,7 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
                 t.add_row([format_string(r1, formats.mismatched_rows), None])
                 all_rows_equal = False
             else:
-                r_zipped = list(six.moves.zip_longest(r1.__fields__, r2.__fields__))
+                r_zipped = list(zip_longest(r1.__fields__, r2.__fields__))
                 r1_string = []
                 r2_string = []
                 for r1_field, r2_field in r_zipped:
@@ -43,7 +44,7 @@ def assert_basic_rows_equality(rows1, rows2, underline_cells=False, formats=Defa
 def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fun_args, underline_cells=False, formats=DefaultFormats()):
     df1_rows = rows1
     df2_rows = rows2
-    zipped = list(six.moves.zip_longest(df1_rows, df2_rows))
+    zipped = list(zip_longest(df1_rows, df2_rows))
     t = PrettyTable(["df1", "df2"])
     all_rows_equal = True
     for r1, r2 in zipped:
@@ -58,7 +59,7 @@ def assert_generic_rows_equality(rows1, rows2, row_equality_fun, row_equality_fu
             t.add_row([format_string(r1_string, formats.matched_rows), format_string(r2_string, formats.matched_rows)])
         # otherwise, rows aren't equal
         else:
-            r_zipped = list(six.moves.zip_longest(r1.__fields__, r2.__fields__))
+            r_zipped = list(zip_longest(r1.__fields__, r2.__fields__))
             r1_string = []
             r2_string = []
             for r1_field, r2_field in r_zipped:

diff --git a/chispa/schema_comparer.py b/chispa/schema_comparer.py
@@ -1,6 +1,6 @@
 from chispa.prettytable import PrettyTable
 from chispa.bcolors import *
-import chispa.six as six
+from itertools import zip_longest
 
 
 class SchemasNotEqualError(Exception):
@@ -19,15 +19,15 @@ def assert_schema_equality_full(s1, s2, ignore_nullable=False, ignore_metadata=F
     def inner(s1, s2, ignore_nullable, ignore_metadata):
         if len(s1) != len(s2):
             return False
-        zipped = list(six.moves.zip_longest(s1, s2))
+        zipped = list(zip_longest(s1, s2))
         for sf1, sf2 in zipped:
             if not are_structfields_equal(sf1, sf2, ignore_nullable, ignore_metadata):
                 return False
         return True
 
     if not inner(s1, s2, ignore_nullable, ignore_metadata):
         t = PrettyTable(["schema1", "schema2"])
-        zipped = list(six.moves.zip_longest(s1, s2))
+        zipped = list(zip_longest(s1, s2))
         for sf1, sf2 in zipped:
             if are_structfields_equal(sf1, sf2, True):
                 t.add_row([blue(sf1), blue(sf2)])
@@ -42,7 +42,7 @@ def inner(s1, s2, ignore_nullable, ignore_metadata):
 def assert_basic_schema_equality(s1, s2):
     if s1 != s2:
         t = PrettyTable(["schema1", "schema2"])
-        zipped = list(six.moves.zip_longest(s1, s2))
+        zipped = list(zip_longest(s1, s2))
         for sf1, sf2 in zipped:
             if sf1 == sf2:
                 t.add_row([blue(sf1), blue(sf2)])
@@ -56,7 +56,7 @@ def assert_basic_schema_equality(s1, s2):
 def assert_schema_equality_ignore_nullable(s1, s2):
     if not are_schemas_equal_ignore_nullable(s1, s2):
         t = PrettyTable(["schema1", "schema2"])
-        zipped = list(six.moves.zip_longest(s1, s2))
+        zipped = list(zip_longest(s1, s2))
         for sf1, sf2 in zipped:
             if are_structfields_equal(sf1, sf2, True):
                 t.add_row([blue(sf1), blue(sf2)])
@@ -69,7 +69,7 @@ def assert_schema_equality_ignore_nullable(s1, s2):
 def are_schemas_equal_ignore_nullable(s1, s2):
     if len(s1) != len(s2):
         return False
-    zipped = list(six.moves.zip_longest(s1, s2))
+    zipped = list(zip_longest(s1, s2))
     for sf1, sf2 in zipped:
         if not are_structfields_equal(sf1, sf2, True):
             return False