WIP: add AFLOW, prereqs and a Dockerfile that tests it

ilia-nikiforov-umn · ilia-nikiforov-umn · commit 89a53aee4671 · 2023-11-01T11:42:05.000-05:00
diff --git a/docker/install/Dockerfile b/docker/install/Dockerfile
@@ -39,7 +39,7 @@ RUN ${PIP} install markupsafe==2.0.1
 RUN ${PIP} install Jinja2==2.11.3
 RUN ${PIP} install edn_format==0.7.5
 RUN ${PIP} install kim-edn==1.3.1
-RUN ${PIP} install kim-property==2.4.0
+RUN ${PIP} install kim-property==2.5.7
 RUN ${PIP} install kim-query==3.0.0
 RUN ${PIP} install simplejson==3.17.2
 RUN ${PIP} install numpy==1.19.5
@@ -48,6 +48,7 @@ RUN ${PIP} install matplotlib==3.7.1
 RUN ${PIP} install pymongo==3.11.3
 RUN ${PIP} install montydb==2.1.1
 RUN ${PIP} install pybind11==2.6.2
+RUN ${PIP} install spglib==2.1.0
 
 #########################################
 ## MD++
@@ -159,3 +160,19 @@ RUN cd ${PACKAGE_DIR} \
  && rm convergence.txz \
  && cd convergence \
  && ${PIP} install .
+
+#########################################
+## AFLOW
+#########################################
+ARG AFLOW_VER=3.2.14
+ARG AFLOW_PACKAGE=aflow.${AFLOW_VER}
+ARG AFLOW_ARCHIVE_TXZ=${AFLOW_PACKAGE}.tar.xz
+RUN cd ${PACKAGE_DIR} \
+ && wget -q http://materials.duke.edu/AFLOW/${AFLOW_ARCHIVE_TXZ} \
+ && tar xJf ${AFLOW_ARCHIVE_TXZ} \
+ && rm ${AFLOW_ARCHIVE_TXZ} \
+ && cd ${AFLOW_PACKAGE} \
+ && make -j2 \
+ && cp aflow aflow_data /usr/local/bin \
+ && cd ${PACKAGE_DIR} \
+ && rm -r ${AFLOW_PACKAGE}
diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile
@@ -0,0 +1,10 @@
+ARG IMAGE_MINIMAL
+
+FROM ${IMAGE_MINIMAL}
+
+COPY test_scripts_and_data test_scripts_and_data
+
+ENV LD_LIBRARY_PATH :/usr/local/lib
+
+# for now filenames are hardcoded, including a string literal in compare_dbs.py
+RUN /bin/bash -c 'cd test_scripts_and_data && bash set_up_and_run_equilibriumcrystalstructure_sample.sh && python compare_dbs.py'
diff --git a/docker/test/test_scripts_and_data/compare_dbs.py b/docker/test/test_scripts_and_data/compare_dbs.py
@@ -0,0 +1,113 @@
+from montydb import MontyClient
+import numpy as np
+import json
+
+def compare_db_to_reference(reference_json_path: str, test_db_path: str, float_fractional_tolerance: float = 0.01):
+    """
+    Compare a montydb generated by tests in the KDP to a reference json file. 
+    The test DB is queried using MontyClient, while the reference json is accessed directly.
+    The reference json contains data types that are used to determine comparison tolerances.
+
+    Args:
+        reference_json_path:
+            Path to reference JSON file. This should be copied from /pipeline/db/db/data.json in the KDP
+        test_db_path:
+            Path to entire db directory generated in /pipeline/db of the KDP instance being tested
+        float_fractional_tolerance:
+            Fraction of the reference value of floating-point numbers that the test db is allowed to deviate by       
+    """
+    with open(reference_json_path) as f:
+        reference_db = json.load(f)
+    with MontyClient(test_db_path, cache_modified=0) as client:
+        db = client.db
+        for i, reference_result in enumerate(reference_db):
+            print ("Processing reference result %d of %d"%(i,len(reference_db)),end="\r")
+            reference_uuid = reference_result["meta"]["uuid"]
+            reference_runner_and_subject = "-".join(reference_uuid.split("-")[:-2])        
+            reference_instance_id = int(reference_result["instance-id"]["$numberInt"])
+            for key in reference_result:
+                if key == "vc-comment":
+                    # VC comments may be a string with float numbers embedded, too much hassle to test
+                    continue
+                if isinstance(reference_result[key],dict):                
+                    if "source-value" in reference_result[key]:
+                        # ok, this is a property key, search for this result                    
+                        # generic error message
+                        error_message_specifying_pair_and_key = "\n\nTest failed while comparing to key '%s' in instance-id %d in reference runner-subject pair %s:\n" \
+                            %(key,reference_instance_id,reference_runner_and_subject)               
+                            
+                        # get numpy array of the source-value from the reference db
+                        reference_source_value_array = np.asarray(reference_result[key]["source-value"])                    
+
+
+
+                        """
+                        MONTYDB VERSION NOTE:
+                            In 2.1.1, the version in the KDP, querying the  /pipeline/db like this gives and requires dicts
+                            e.g. {"$numberDouble": "0.70535806"} for typed values, just like the raw json in the reference db. 
+                            However, if we ever upgrade to 2.5.2 (or even some earlier versions might have this),
+                            typed values just have the value.
+                        """
+
+                        # query the test DB
+                        query={
+                            "meta.uuid":{"$regex":reference_runner_and_subject},
+                            "instance-id.$numberInt":str(reference_instance_id)
+                        }                    
+                        project={key:1,"_id":False}
+                        cursor=db.data.find(query,projection=project)
+
+                        # get numpy array of the source-value from the DB we are testing
+                        try:
+                            test_source_value_array = np.asarray(next(cursor)[key]["source-value"])
+                        except StopIteration:
+                            assert False, error_message_specifying_pair_and_key+"No matches found in test DB." 
+                        except:
+                            raise RuntimeError("Unexpected exception when searching test DB")                     
+                        
+                        # should be only one result, test this
+                        try:                        
+                            next(cursor)
+                            assert False, error_message_specifying_pair_and_key+"Multiple matches found in test DB."                          
+                        except StopIteration:
+                            pass
+                        except:
+                            raise RuntimeError("Unexpected exception when searching test DB")
+                        
+                        # error message segment for displaying the source-values
+                        error_message_showing_source_values = "\nMismatch found between reference value\n\n%s\n\nand test value\n\n%s\n\n" % \
+                            (reference_source_value_array,test_source_value_array)
+
+                        # arrays should be the same shape                    
+                        assert reference_source_value_array.shape == test_source_value_array.shape, \
+                            error_message_specifying_pair_and_key + error_message_showing_source_values + "Arrays are different shapes."
+                        if reference_source_value_array.dtype != "object": 
+                            # this means it's strings, if its doubles or ints, each entry is a dict e.g. "$numberDouble": "0.70535806"
+                            assert (reference_source_value_array == test_source_value_array).all(), \
+                                error_message_specifying_pair_and_key + error_message_showing_source_values + "Non-numerical values are not equal."
+                        else: # the reference ndarray is dicts, so we have to look at data types
+                            reference_source_value_array_flat=reference_source_value_array.flat
+                            if len(reference_source_value_array_flat[0].keys()) != 1:
+                                raise RuntimeError("\n\nElements of reference DB value\n\n%s\n\nare not single-key dicts as expected."%reference_source_value_array)
+                            mongo_dtype = list(reference_source_value_array_flat[0].keys())[0]
+                            for reference_source_value_dict,test_source_value_dict in zip(reference_source_value_array_flat,test_source_value_array.flat):                            
+                                if mongo_dtype == "$numberDouble":
+                                    reference_source_value = float(reference_source_value_dict[mongo_dtype])                                    
+                                    test_source_value = float(test_source_value_dict[mongo_dtype])
+                                    assert abs(reference_source_value-test_source_value) <= abs(float_fractional_tolerance*reference_source_value), \
+                                        error_message_specifying_pair_and_key + error_message_showing_source_values + \
+                                        "Floating point values are not within the requested fractional tolerance %f"%float_fractional_tolerance
+                                elif mongo_dtype == "$numberInt":
+                                    reference_source_value = int(reference_source_value_dict[mongo_dtype])
+                                    test_source_value = int(test_source_value_dict[mongo_dtype])
+                                    assert reference_source_value == test_source_value, \
+                                        error_message_specifying_pair_and_key + error_message_showing_source_values + \
+                                        "Integer values are not equal."
+                                else:
+                                    raise RuntimeError("Unexpected data type %s in reference DB"%mongo_dtype)
+
+if __name__=='__main__':
+    reference_json_file = "data.json"
+    test_db  = "/pipeline/db"
+    compare_db_to_reference(reference_json_file,test_db)
+    print("SUCCESS! All results provided in reference database were successfully matched.")
diff --git a/docker/test/test_scripts_and_data/data.json b/docker/test/test_scripts_and_data/data.json
diff --git a/docker/test/test_scripts_and_data/set_up_and_run_equilibriumcrystalstructure_sample.sh b/docker/test/test_scripts_and_data/set_up_and_run_equilibriumcrystalstructure_sample.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+kimitems install -D EquilibriumCrystalStructure_AB_hP10_156_2a2bc_2a2bc_CSi__TE_758846131690_001
+kimitems install -D EquilibriumCrystalStructure_AB_hR6_160_3a_3a_SZn__TE_368210937505_001
+kimitems install -D EquilibriumCrystalStructure_A_cI82_217_acgh_Si__TE_858705144968_001
+kimitems install -D EquilibriumCrystalStructure_A_oP24_58_eg2h_S__TE_654572373022_001
+kimitems install -D EquilibriumCrystalStructure_A_oC16_65_pq_C__TE_970978470247_001
+kimitems install -D EquilibriumCrystalStructure_A_tI8_139_h_C__TE_939583133718_001
+kimitems install -D EquilibriumCrystalStructure_A2B_cP12_205_c_a_SZn__TE_887144277034_001
+kimitems install -D EquilibriumCrystalStructure_A2B_tP6_131_i_e_CSi__TE_797499413755_001
+kimitems install -D EquilibriumCrystalStructure_A_oF16_69_gh_Si__TE_099421378389_001
+kimitems install -D EquilibriumCrystalStructure_A_mP32_13_8g_S__TE_685644464884_001
+kimitems install -D EquilibriumCrystalStructure_AB_cF8_216_a_c_SZn__TE_981216532817_001
+kimitems install -D EquilibriumCrystalStructure_A_mC16_12_4i_Si__TE_709261317000_001
+kimitems install -D EquilibriumCrystalStructure_A_aP28_2_14i_S__TE_073817817914_001
+kimitems install -D EquilibriumCrystalStructure_A_oI120_71_lmn6o_C__TE_407657597689_001
+kimitems install -D SW_ZhouWardMartin_2013_CdTeZnSeHgS__MO_503261197030_003
+kimitems install -D Sim_LAMMPS_TersoffZBL_DevanathanDiazdelaRubiaWeber_1998_SiC__SM_578912636995_000
+
+pipeline-database set local
+
+pipeline-run-matches \*_TE_\*