1
+ from montydb import MontyClient
2
+ import numpy as np
3
+ import json
4
+
5
+ def compare_db_to_reference (reference_json_path : str , test_db_path : str , float_fractional_tolerance : float = 0.01 ):
6
+ """
7
+ Compare a montydb generated by tests in the KDP to a reference json file.
8
+ The test DB is queried using MontyClient, while the reference json is accessed directly.
9
+ The reference json contains data types that are used to determine comparison tolerances.
10
+
11
+ Args:
12
+ reference_json_path:
13
+ Path to reference JSON file. This should be copied from /pipeline/db/db/data.json in the KDP
14
+ test_db_path:
15
+ Path to entire db directory generated in /pipeline/db of the KDP instance being tested
16
+ float_fractional_tolerance:
17
+ Fraction of the reference value of floating-point numbers that the test db is allowed to deviate by
18
+ """
19
+ with open (reference_json_path ) as f :
20
+ reference_db = json .load (f )
21
+ with MontyClient (test_db_path , cache_modified = 0 ) as client :
22
+ db = client .db
23
+ for i , reference_result in enumerate (reference_db ):
24
+ print ("Processing reference result %d of %d" % (i ,len (reference_db )),end = "\r " )
25
+ reference_uuid = reference_result ["meta" ]["uuid" ]
26
+ reference_runner_and_subject = "-" .join (reference_uuid .split ("-" )[:- 2 ])
27
+ reference_instance_id = int (reference_result ["instance-id" ]["$numberInt" ])
28
+ for key in reference_result :
29
+ if key == "vc-comment" :
30
+ # VC comments may be a string with float numbers embedded, too much hassle to test
31
+ continue
32
+ if isinstance (reference_result [key ],dict ):
33
+ if "source-value" in reference_result [key ]:
34
+ # ok, this is a property key, search for this result
35
+ # generic error message
36
+ error_message_specifying_pair_and_key = "\n \n Test failed while comparing to key '%s' in instance-id %d in reference runner-subject pair %s:\n " \
37
+ % (key ,reference_instance_id ,reference_runner_and_subject )
38
+
39
+ # get numpy array of the source-value from the reference db
40
+ reference_source_value_array = np .asarray (reference_result [key ]["source-value" ])
41
+
42
+
43
+
44
+ """
45
+ MONTYDB VERSION NOTE:
46
+ In 2.1.1, the version in the KDP, querying the /pipeline/db like this gives and requires dicts
47
+ e.g. {"$numberDouble": "0.70535806"} for typed values, just like the raw json in the reference db.
48
+ However, if we ever upgrade to 2.5.2 (or even some earlier versions might have this),
49
+ typed values just have the value.
50
+ """
51
+
52
+ # query the test DB
53
+ query = {
54
+ "meta.uuid" :{"$regex" :reference_runner_and_subject },
55
+ "instance-id.$numberInt" :str (reference_instance_id )
56
+ }
57
+ project = {key :1 ,"_id" :False }
58
+ cursor = db .data .find (query ,projection = project )
59
+
60
+ # get numpy array of the source-value from the DB we are testing
61
+ try :
62
+ test_source_value_array = np .asarray (next (cursor )[key ]["source-value" ])
63
+ except StopIteration :
64
+ assert False , error_message_specifying_pair_and_key + "No matches found in test DB."
65
+ except :
66
+ raise RuntimeError ("Unexpected exception when searching test DB" )
67
+
68
+ # should be only one result, test this
69
+ try :
70
+ next (cursor )
71
+ assert False , error_message_specifying_pair_and_key + "Multiple matches found in test DB."
72
+ except StopIteration :
73
+ pass
74
+ except :
75
+ raise RuntimeError ("Unexpected exception when searching test DB" )
76
+
77
+ # error message segment for displaying the source-values
78
+ error_message_showing_source_values = "\n Mismatch found between reference value\n \n %s\n \n and test value\n \n %s\n \n " % \
79
+ (reference_source_value_array ,test_source_value_array )
80
+
81
+ # arrays should be the same shape
82
+ assert reference_source_value_array .shape == test_source_value_array .shape , \
83
+ error_message_specifying_pair_and_key + error_message_showing_source_values + "Arrays are different shapes."
84
+ if reference_source_value_array .dtype != "object" :
85
+ # this means it's strings, if its doubles or ints, each entry is a dict e.g. "$numberDouble": "0.70535806"
86
+ assert (reference_source_value_array == test_source_value_array ).all (), \
87
+ error_message_specifying_pair_and_key + error_message_showing_source_values + "Non-numerical values are not equal."
88
+ else : # the reference ndarray is dicts, so we have to look at data types
89
+ reference_source_value_array_flat = reference_source_value_array .flat
90
+ if len (reference_source_value_array_flat [0 ].keys ()) != 1 :
91
+ raise RuntimeError ("\n \n Elements of reference DB value\n \n %s\n \n are not single-key dicts as expected." % reference_source_value_array )
92
+ mongo_dtype = list (reference_source_value_array_flat [0 ].keys ())[0 ]
93
+ for reference_source_value_dict ,test_source_value_dict in zip (reference_source_value_array_flat ,test_source_value_array .flat ):
94
+ if mongo_dtype == "$numberDouble" :
95
+ reference_source_value = float (reference_source_value_dict [mongo_dtype ])
96
+ test_source_value = float (test_source_value_dict [mongo_dtype ])
97
+ assert abs (reference_source_value - test_source_value ) <= abs (float_fractional_tolerance * reference_source_value ), \
98
+ error_message_specifying_pair_and_key + error_message_showing_source_values + \
99
+ "Floating point values are not within the requested fractional tolerance %f" % float_fractional_tolerance
100
+ elif mongo_dtype == "$numberInt" :
101
+ reference_source_value = int (reference_source_value_dict [mongo_dtype ])
102
+ test_source_value = int (test_source_value_dict [mongo_dtype ])
103
+ assert reference_source_value == test_source_value , \
104
+ error_message_specifying_pair_and_key + error_message_showing_source_values + \
105
+ "Integer values are not equal."
106
+ else :
107
+ raise RuntimeError ("Unexpected data type %s in reference DB" % mongo_dtype )
108
+
109
+ if __name__ == '__main__' :
110
+ reference_json_file = "data.json"
111
+ test_db = "/pipeline/db"
112
+ compare_db_to_reference (reference_json_file ,test_db )
113
+ print ("SUCCESS! All results provided in reference database were successfully matched." )
0 commit comments