diff --git a/src/data_loader.py b/src/data_loader.py index ee64106..8fb2f14 100644 --- a/src/data_loader.py +++ b/src/data_loader.py @@ -31,6 +31,7 @@ def __init__(self, model, batch, mongo_dao, bucket, root_path, data_common, subm self.main_nodes = self.model.get_main_nodes() self.errors = None self.submission = submission + self.ORCID = submission.get(ORCID) """ param: file_path_list downloaded from s3 bucket @@ -60,6 +61,7 @@ def load_data(self, file_path_list): for index, row in df.iterrows(): type = row[TYPE] node_id = self.get_node_id(type, row) + crdc_id = None exist_node = self.mongo_dao.get_dataRecord_by_node(node_id, type, self.batch[SUBMISSION_ID]) # 2. construct dataRecord rawData = df.loc[index].to_dict() @@ -71,17 +73,14 @@ def load_data(self, file_path_list): id = self.get_record_id(exist_node) # onlu generating CRDC ID for valid nodes valid_crdc_id_nodes = type in main_node_types - crdc_id = self.get_crdc_id(exist_node, type, node_id, self.submission.get(STUDY_ID)) if valid_crdc_id_nodes else None + if valid_crdc_id_nodes: + crdc_id = self.get_crdc_id(exist_node, type, node_id, self.submission.get(STUDY_ID)) if type != PRINCIPAL_INVESTIGATOR else self.ORCID # file nodes if valid_crdc_id_nodes and type in file_types: id_field = self.file_nodes.get(type, {}).get(ID_FIELD) file_id_val = row.get(id_field) if file_id_val: crdc_id = file_id_val if file_id_val.startswith(DCF_PREFIX) else DCF_PREFIX + file_id_val - # principal investigator node - if type == PRINCIPAL_INVESTIGATOR and PRINCIPAL_INVESTIGATOR in main_node_types: - submission = self.mongo_dao.get_submission(self.batch[SUBMISSION_ID]) - crdc_id = submission.get(ORCID) if submission and submission.get(ORCID) else None if index == 0 or not self.process_m2m_rel(records, node_id, rawData, relation_fields): dataRecord = { @@ -110,7 +109,7 @@ def load_data(self, file_path_list): STUDY_ID: self.submission.get(STUDY_ID) } if crdc_id: - dataRecord["CRDC_ID"] = crdc_id + dataRecord[CRDC_ID] = crdc_id if type in file_types: dataRecord[S3_FILE_INFO] = self.get_file_info(type, prop_names, row) records.append(dataRecord)