From 8fc24878758a308ba891c39fcc0df109f4b11e97 Mon Sep 17 00:00:00 2001 From: Nathan Edwards Date: Fri, 8 Dec 2023 15:33:00 -0500 Subject: [PATCH] script changes --- scripts/bulkimg.sh | 2 +- scripts/glycotree_gctconid.py | 39 +++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/scripts/bulkimg.sh b/scripts/bulkimg.sh index 86694b69..b80d3184 100755 --- a/scripts/bulkimg.sh +++ b/scripts/bulkimg.sh @@ -66,4 +66,4 @@ shift $(($OPTIND - 1)) if [ $VERBOSE -eq 1 ]; then echo $JAVA -cp "$JAR" $MAIN notation "$NOTATION" display "$DISP" scale "$SCALE" orient "$ORIENT" redend "$REDEND" opaque "$OPAQUE" format "$FORMAT" force "$FORCE" outdir "$OUTDIR" $@ 1>&2 fi -exec $JAVA -cp "$JAR" $MAIN notation "$NOTATION" display "$DISP" scale "$SCALE" orient "$ORIENT" redend "$REDEND" opaque "$OPAQUE" format "$FORMAT" force "$FORCE" outdir "$OUTDIR" $@ 2>/dev/null | egrep -w -v '(org.glycoinfo|DEBUG|GlycanImageCmdline.main|org.eurocarbdb.application.glycanbuilder)' +exec $JAVA -cp "$JAR" $MAIN notation "$NOTATION" display "$DISP" scale "$SCALE" orient "$ORIENT" redend "$REDEND" opaque "$OPAQUE" format "$FORMAT" force "$FORCE" outdir "$OUTDIR" $@ 2>/dev/null | egrep -w -v '(org.glycoinfo|DEBUG|GlycanImageCmdline.main|org.eurocarbdb.application.glycanbuilder|Warning:)' diff --git a/scripts/glycotree_gctconid.py b/scripts/glycotree_gctconid.py index 4b8721ba..d913dd9a 100755 --- a/scripts/glycotree_gctconid.py +++ b/scripts/glycotree_gctconid.py @@ -32,6 +32,27 @@ for r in csv.DictReader(open(idmapfilename),dialect='excel-tab'): idmaps[r['Accession']][r['GlycoCTResidueIndex']] = r['CanonicalResidueIndex'] +validresidues = set(filter(None,""" +GlcNAc +Glc +Man +Gal +GalNAc +Fuc +Xyl +NeuAc +NeuGc +P +S +Count +""".split())) + +def validcomp(comp): + for k,v in comp.items(): + if k not in validresidues and v > 0: + return False + return True + def check_idmap(gly1,gly2,idmap): ids1 = gly1.external_descriptor_ids() ids2 = gly2.external_descriptor_ids() @@ -50,6 +71,13 @@ def iternlinkedaccs(): for acc,strict,resids,linkids in gm.getstruct('GGM','001001'): if acc in seen: continue + gly = gtc.getGlycan(acc) + if not gly or gly.repeated(): + continue + comp = gly.iupac_composition(aggregate_basecomposition=False) + # print(comp) + if not validcomp(comp): + continue yield acc seen.add(acc) @@ -65,11 +93,14 @@ def iterolinkedaccs(): for acc,strict,resids,linkids in gm.getstruct('GGM',olc): if acc in seen: continue + gly = gtc.getGlycan(acc) + if not gly or gly.repeated(): + continue + comp = gly.iupac_composition(aggregate_basecomposition=False) + print(comp) + if not validcomp(comp): + continue if olc in "001034": - gly = gtc.getGlycan(acc) - if not gly or gly.repeated(): - continue - comp = gly.iupac_composition() if comp['Count'] not in (1,2): continue if comp['Count'] == 2 and comp['NeuAc'] != 1: