-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
71 lines (57 loc) · 3.89 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
from uploader.run_metadata_collector.miseq_run_metadata_collector import CollectMiseqRunMetadata
from uploader.run_metadata_collector.nextseq_run_metadata_collector import CollectNextSeqRunMetadata
from uploader.sample_metadata_collector.miseq_sample_metadata_collector import CollectMiseqSampleMetadata
from uploader.sample_metadata_collector.nextseq_sample_metadata_collector import CollectNextSeqSampleMetadata
from uploader.metadata_import import MetadataImport
from uploader.file_helpers import get_all_runs_with_data_for_catalogue
from uploader.manage_libraries import LibrariesManager
import argparse
def run(organised_files_foldes, wsi_folders, libraries_folders):
molgenis_login = os.environ["CATALOG_LOGIN"]
molgenis_password = os.environ["CATALOG_PASSWORD"]
importer = MetadataImport(wsi_folders,
libraries_folders,
molgenis_login,
molgenis_password)
miseq_run_paths_for_catalogue_upload = get_all_runs_with_data_for_catalogue(organised_files_foldes,
wanted_run_type="MiSEQ")
nextseq_run_paths_for_catalogue_upload = get_all_runs_with_data_for_catalogue(organised_files_foldes,
wanted_run_type="NextSeq")
# miseq upload
for absolute_run_path in miseq_run_paths_for_catalogue_upload:
print(absolute_run_path)
lib_manager = LibrariesManager(libraries_folders, absolute_run_path)
run_metadata = CollectMiseqRunMetadata(absolute_run_path).collect()
catalog_info_folder = os.path.join(absolute_run_path, "catalog_info_per_pred_number")
for sample_id in os.listdir(catalog_info_folder):
clinical_info_path = os.path.join(catalog_info_folder, sample_id)
sample_id = sample_id.replace(".json", "")
sample_path = os.path.join(absolute_run_path, "Samples", sample_id)
lib_data = lib_manager.get_data_from_libraries(sample_id)
sample_metadata = CollectMiseqSampleMetadata(absolute_run_path, sample_path, catalog_info_folder).collect()
importer.upload(run_metadata, sample_metadata, clinical_info_path, "MiSEQ", lib_data)
open(os.path.join(absolute_run_path, ".uploaded"), "w").close()
# nextseq upload
for absolute_run_path in nextseq_run_paths_for_catalogue_upload:
print(absolute_run_path)
run_metadata = CollectNextSeqRunMetadata(absolute_run_path).collect()
catalog_info_folder = os.path.join(absolute_run_path, "catalog_info_per_pred_number")
for sample_id in os.listdir(catalog_info_folder):
clinical_info_path = os.path.join(catalog_info_folder, sample_id)
sample_id = sample_id.replace(".json", "")
sample_path = os.path.join(absolute_run_path, "Samples", sample_id)
sample_metadata = CollectNextSeqSampleMetadata(absolute_run_path, sample_path, catalog_info_folder).collect()
importer.upload(run_metadata, sample_metadata, clinical_info_path, "NextSeq")
open(os.path.join(absolute_run_path, ".uploaded"), "w").close()
del importer
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog="Uploader",
description="Organise pseudonymized runs into a specifed output folder \n." +
" It is important to use full paths")
parser.add_argument("-o", "--organised_runs", type=str, required=True, help="")
parser.add_argument("-w", "--wsi", type=str, required=True, help="Path to a WSI folder")
parser.add_argument("-d", "--libraries", type=str, required=True, help="Path to a libraries document")
args = parser.parse_args()
run(args.organised_runs, args.wsi, args.libraries)