Skip to content

Commit

Permalink
Nextclade
Browse files Browse the repository at this point in the history
  • Loading branch information
aknijn committed Jul 16, 2021
1 parent 8fbfe8c commit b20d086
Show file tree
Hide file tree
Showing 12 changed files with 141,950 additions and 97 deletions.
141,868 changes: 141,867 additions & 1 deletion RECoVC/tree.json

Large diffs are not rendered by default.

5 changes: 0 additions & 5 deletions RECoVJ/Lineages-VOC

This file was deleted.

8 changes: 0 additions & 8 deletions RECoVJ/Lineages-VOI

This file was deleted.

22 changes: 0 additions & 22 deletions RECoVJ/Lineages-VUM

This file was deleted.

105 changes: 48 additions & 57 deletions RECoVJ/RECoVJ.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,53 +49,44 @@ def isNewLineage(inLineage):
cnx.close()
return isNew

def getTypeLineage(inLineage, inSpike):
typeLineage = "-"
isVUM = isTypeLineage(inLineage, inSpike, "VUM")
if isVUM == 1:
typeLineage = "VUM"
if isVUM == 2:
typeLineage = "VUM*"
isVOI = isTypeLineage(inLineage, inSpike, "VOI")
if isVOI == 1:
typeLineage = "VOI"
if isVOI == 2:
typeLineage = "VOI*"
isVOC = isTypeLineage(inLineage, inSpike, "VOC")
if isVOC == 1:
typeLineage = "VOC"
if isVOC == 2:
typeLineage = "VOC*"
return typeLineage

def isNotificaLineage(inLineage, inSpike):
return isTypeLineage(inLineage, inSpike, "EW") > 0
def isNotificaVariant(inLineage, inSpike):
isNotifica = False
with open(TOOL_DIR + '/Variants-EW', 'r') as f:
variants = f.read().splitlines()
for variant in variants:
if '+' in variant:
lineage_spike = variant.split('+')
if (lineage_spike[0] == inLineage) and (lineage_spike[1] in inSpike):
isNotifica = True
break
else:
if variant == inLineage:
isNotifica = True
break
return isNotifica

def isTypeLineage(inLineage, inSpike, inType):
isType = 0
with open(TOOL_DIR + '/Lineages-' + inType, 'r') as f:
vocs = f.read().splitlines()
for voc in vocs:
if '+' in voc:
linspike = voc.split('+')
if linspike[0] == inLineage and isTypeLineageSpike(linspike[1], inSpike, inType):
isType = 2
else:
if voc == inLineage:
isType = 1
return isType
def getVariant(inLineage, inClade, inSpike):
typeVariant = '-'
typeVariant = getVariant_Lineage_Clade(inLineage, inSpike, 'Lineages')
if typeVariant == '-':
typeVariant = getVariant_Lineage_Clade(inClade, inSpike, 'Clades')
return typeVariant

def isTypeLineageSpike(vocSpike, inSpikes, inType):
isType = False
if inSpikes != '=' and inSpikes != 'ND':
with open(TOOL_DIR + '/Spikes-' + inType, 'r') as f:
lstSpikes = inSpikes.split('; ')
for inSpike in lstSpikes:
if vocSpike[-1] == 'X' and inSpike[:-1] in vocSpike[:-1]:
isType = True
if inSpike in vocSpike:
isType = True
return isType
def getVariant_Lineage_Clade(inLineage_Clade, inSpike, inType):
outVariant = '-'
with open(TOOL_DIR + '/Variants-' + inType, 'r') as f:
lines = f.read().splitlines()
for line in lines:
relations = line.split('\t')
if relations[1] == '*':
if relations[0] == inLineage_Clade:
outVariant = relations[2]
break
else:
if relations[0] == inLineage_Clade and (relations[1] in inSpike):
outVariant = relations[2]
break
return outVariant

def colindex(gene):
colindexes = {
Expand Down Expand Up @@ -141,11 +132,11 @@ def main():
parser.add_argument('--librarytype', dest='librarytype', help='library type')
parser.add_argument('--region', dest='region', help='region')
parser.add_argument('--year', dest='year', help='year')
parser.add_argument('--lineage', dest='lineage', help='lineage')
parser.add_argument('--clade', dest='clade', help='clade')
parser.add_argument('--variants', dest='variants', help='variants')
parser.add_argument('--lineage', dest='lineage', help='pangolin')
parser.add_argument('--clade', dest='clade', help='nextclade')
parser.add_argument('--variants', dest='variants', help='Spike muations')
parser.add_argument('--consensus', dest='consensus', help='consensus')
parser.add_argument('--recovery_json', dest='recovery_json', help='recovery_json')
parser.add_argument('--recovery_json', dest='recovery_json', help='output json')

args = parser.parse_args()
try:
Expand All @@ -168,13 +159,13 @@ def main():
elif library == 'cons':
report_data["sequence"] = "Consensus"
# Ns in consensus
with open(args.consensus) as cons_in:
with open(args.consensus, 'r') as cons_in:
temp = cons_in.read().splitlines()
consensus="".join(temp[1:])
percN = (100.0 * consensus.count('N')) / (len(consensus))
report_data["N_consensus"] = str(consensus.count('N')) + " (" + "{:.1f}".format(percN) + "%)"
# obtain lineage and quality control from pangolin result and from Ns in consensus
with open(args.lineage) as table_in:
with open(args.lineage, 'r') as table_in:
tab_lineage = [[str(col).rstrip() for col in row.split(',')] for row in table_in]
report_data["lineage"] = tab_lineage[1][1]
lineage = tab_lineage[1][1]
Expand All @@ -186,11 +177,11 @@ def main():
else:
report_data["qc_status"] = 'Passed'
# obtain clade from nextclade result
with open(args.clade) as table_in:
with open(args.clade, 'r') as table_in:
tab_clade = [[str(col).rstrip() for col in row.split('\t')] for row in table_in]
report_data["clade"] = tab_clade[1][1].strip('\"')
# variants
with open(args.variants) as table_in:
with open(args.variants, 'r') as table_in:
tab_variants = [[str(col).rstrip() for col in row.split('\t')] for row in table_in]
if library == 'sang':
strDefault = "ND"
Expand All @@ -217,14 +208,14 @@ def main():
report_data["ORF8"] = format_variants(report_variants[8])
report_data["N-protein"] = format_variants(report_variants[9])
report_data["ORF10"] = format_variants(report_variants[10])
# VOC
if isNewLineage(lineage):
# Variante
if isNewLineage(report_data["lineage"]):
report_data["notifica"] = "New"
else:
report_data["notifica"] = "-"
if isNotificaLineage(lineage, report_data["S-protein"]):
if isNotificaVariant(report_data["lineage"], report_data["S-protein"]):
report_data["notifica"] = "Si"
report_data["VOC"] = getTypeLineage(lineage, report_data["S-protein"])
report_data["variante"] = getVariant(report_data["lineage"], report_data["clade"], report_data["S-protein"])
finally:
report = open(args.recovery_json, 'w')
report.write("[" + json.dumps(report_data) + "]")
Expand Down
1 change: 0 additions & 1 deletion RECoVJ/Spikes-EW

This file was deleted.

1 change: 0 additions & 1 deletion RECoVJ/Spikes-VOC

This file was deleted.

1 change: 0 additions & 1 deletion RECoVJ/Spikes-VOI

This file was deleted.

1 change: 0 additions & 1 deletion RECoVJ/Spikes-VUM

This file was deleted.

11 changes: 11 additions & 0 deletions RECoVJ/Variants-Clades
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
20I (Alpha, V1) E484K Alpha+E484K
20I (Alpha, V1) * Alpha
20H (Beta, V2) * Beta
20J (Gamma, V3) * Gamma
21A (Delta) * Delta
21D (Eta) * Eta
21F (Iota) * Iota
21B (Kappa) * Kappa
21G (Lambda) * Lambda
21C (Epsilon) * Epsilon
21E (Theta) * Theta
File renamed without changes.
24 changes: 24 additions & 0 deletions RECoVJ/Variants-Lineages
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
B.1.1.318 * VUM
B.1.1.519 * VUM
B.1.427 * Epsilon
B.1.429 * Epsilon
B.1.621 * VUM
C.36.3 * VUM
C.36.3.1 * VUM
B.1.1.7 E484K Alpha+E484K
B.1.1.7 * Alpha
B.1.351 * Beta
B.1.351.2 * Beta
B.1.351.3 * Beta
P.1 * Gamma
P.1.1 * Gamma
P.1.2 * Gamma
B.1.617.2 * Delta
AY.1 * Delta
AY.2 * Delta
B.1.525 * Eta
B.1.526 * Iota
B.1.617.1 * Kappa
C.37 * Lambda
P.2 * Zeta
P.3 * Theta

0 comments on commit b20d086

Please sign in to comment.