-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_comparers.py
72 lines (53 loc) · 2.19 KB
/
test_comparers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pytest
from pyspark.sql import SparkSession
from pyspark.sql.types import StructField, StringType, IntegerType, StructType
from .comparers import assert_frames_functionally_equivalent
spark = SparkSession.builder.master("local[*]").getOrCreate()
def test_non_equal_frames_not_functionally_equivalent():
frame1 = spark.createDataFrame([(1,), (2,)])
frame2 = spark.createDataFrame([(1,), (2,), (2,)])
with pytest.raises(AssertionError):
assert_frames_functionally_equivalent(frame1, frame2)
def test_identical_frames_are_identical():
df = spark.range(1)
assert_frames_functionally_equivalent(df, df)
def test_column_order_is_irrelevant_for_functional_equivalence():
fields = [
StructField("name", StringType(), True),
StructField("age", IntegerType(), True),
]
frame1 = spark.createDataFrame(
[("Wim", 1), ("Conrad", 2)], schema=StructType(fields)
)
frame2 = spark.createDataFrame(
[(1, "Wim"), (2, "Conrad")], schema=StructType(fields[::-1])
)
assert_frames_functionally_equivalent(frame1, frame2)
def test_ordering_of_data_is_irrelevant_for_functional_equivalence():
fields = [
StructField("name", StringType(), True),
StructField("age", IntegerType(), True),
]
frame1 = spark.createDataFrame(
[("Wim", 1), ("Conrad", 2)], schema=StructType(fields)
)
frame2 = spark.createDataFrame(
[("Conrad", 2), ("Wim", 1)], schema=StructType(fields)
)
assert_frames_functionally_equivalent(frame1, frame2)
def test_functional_equivalence_testing_works_with_nones():
# in Python 3, None is not sortable wrt to other types,
# i.e. None < 3 errors out
fields = [
StructField("name", StringType(), True),
StructField("age", IntegerType(), True),
]
df = spark.createDataFrame(
[(None, 1), ("Christina", 2)], schema=StructType(fields)
)
assert_frames_functionally_equivalent(df, df)
def test_functional_equivalence_still_means_same_values():
df1 = spark.createDataFrame([("a", "b")])
df2 = spark.createDataFrame([("a", "c")])
with pytest.raises(AssertionError):
assert_frames_functionally_equivalent(df1, df2)