-
Notifications
You must be signed in to change notification settings - Fork 0
/
convertSnomed2tsv.py
99 lines (88 loc) · 2.71 KB
/
convertSnomed2tsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 17 15:17:27 2020
@author: crodri
"""
import csv
snomedict = {}
snomedfull = csv.reader(open("/home/crodri/BSC/mappings/SNOMED/SnomedCT_Spanish_Edition/SnomedCT_SpanishRelease-es_PRODUCTION_20200430T120000Z/RF2Release/Snapshot/Terminology/sct2_Description_SpanishExtensionSnapshot-es_INT_20200430.txt"),dialect='excel',delimiter="\t")
for x in snomedfull:
if x[0] == 'id':
pass
elif x[2] == '1':
try:
if x[7] in snomedict.keys():
keylist = snomedict[x[7].lower()]
if x[4] in keylist:
pass
else:
keylist.append(x[4])
snomedict[x[7].lower()] = keylist
else:
snomedict[x[7].lower()] = [x[4]]
except IndexError:
pass
#import pickle
salida = csv.writer(open("/home/crodri/GIT/TEMUNorm/tsv_dictionaries/SpanishSnomed.tsv",'w'),dialect='excel',delimiter="\t")
n = 0
m = 0
for r in snomedict:
cs = snomedict[r]
codes = "|".join(cs)
salida.writerow([r,codes])
n += 1
print(n)
icd10 = {}
reverseicd = {}
f = 0
r = 0
for x in csv.reader(open("/home/crodri/BSC/mappings/SNOMED/SNOMED_CT_to_ICD-10-CM_Resources_20200301/SNOMED_CT_to_ICD-10-CM_Resources_20200301/tls_Icd10cmHumanReadableMap_US1000124_20200301.tsv"),dialect='excel',delimiter="\t"):
if x[0] == 'id':
pass
elif x[2] == '1':
if x[5] in icd10.keys():
#print("already found: ",x[5],"\t", x[11])
f += 1
else:
if x[11] == '':
print( x)
else:
icd10[x[5]] = x[11]
if x[11] == '':
pass
else:
if x[11]in reverseicd.keys():
print("Repeated ",x[11])
r += 1
else:
if x[5] == '':
print(x)
else:
reverseicd[x[11]] = x[5]
reverSnomed = {}
for t in snomedict:
codes = snomedict[t]
for c in codes:
if c in reverSnomed:
listado = reverSnomed[c]
listado.append(t)
reverSnomed[c] = listado
else:
reverSnomed[c] = [t]
len(reverSnomed)
mappICD2SnoTerms = {}
non = 0
for s in icd10:
i = icd10[s]
try:
termos = reverSnomed[s]
mappICD2SnoTerms[i] = [s,termos]
except KeyError:
non += 1
print("Keys not found: ",non)
salidacie = csv.writer(open("/home/crodri/GIT/TEMUNorm/tsv_dictionaries/SpanishCIE10.tsv",'w'),dialect='excel',delimiter="\t")
for i in mappICD2SnoTerms:
snomed,listaterminos = mappICD2SnoTerms[i]
for t in listaterminos:
salidacie.writerow([t,i,snomed])