Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

finish_argument_non_unique_CDS_IDS #90

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions gff3tool/lib/gff3_ID_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit
'missing': []
}
ID_order = []
Copy_ID_dict=[]
roots = list()
logger.info('Generate new ID for features in (%s)', in_gff)
for line in gff3.lines:
Expand All @@ -188,10 +189,14 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit
if uuid_on:
newID = str(uuid.uuid1())
if 'ID' in line['attributes']:
#print line['attributes']
if line['attributes']['ID'] in ID_dict:
#print line['attributes']['ID']
ID_dict[line['attributes']['ID']].append(newID)
if alias:
line['attributes']['Alias'] = line['attributes']['ID']
if line['attributes']['ID'] not in Copy_ID_dict:
Copy_ID_dict.append(line['attributes']['ID'])
line['attributes']['ID'] = newID
else:
ID_dict[line['attributes']['ID']] = [newID]
Expand All @@ -202,6 +207,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit
else:
ID_dict['missing'].append(newID)
line['attributes']['ID'] = newID

if 'Parent' in line['attributes']:
for index, parent in enumerate(line['attributes']['Parent']):
if parent in ID_dict:
Expand All @@ -217,6 +223,8 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit
except KeyError:
logger.warning('[Missing Attributes] Line (%s)', str(line['line_index'] + 1))
IDnumber = 0
#print Copy_ID_dict
#gene parent
for root in roots:
newID = idgenerator(prefix, IDnumber, digitlen)
IDnumber = newID['maxnum']
Expand All @@ -231,7 +239,6 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit
for index, parent in enumerate(child['attributes']['Parent']):
if parent in ID_dict:
child['attributes']['Parent'][index] = newID['ID']

newcID = '%s-R%s' % (newID['ID'], alphabets.pop(0))
ID_dict[child['attributes']['ID']] = [newcID]
ID_order.append(child['attributes']['ID'])
Expand Down Expand Up @@ -271,6 +278,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit
descend['attributes']['ID'] = newdID['ID']
flag = True
descend['attributes']['Parent'][index] = ID_dict[parent][0]

if merge_report and out_merge_report:
logger.info('Update report file generated by gff3_merge program with new IDs.')
with open(out_merge_report, 'w') as out_f:
Expand All @@ -287,17 +295,48 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit
for log_line in log_lines:
out_f.write('\t'.join(log_line) + '\n')
logger.info('Write out gff3 file: (%s)', out_gff)

#cds_share_id
cds_parent=[]
Copy_ID_dict_value=[]
listid=[]
if args.type:
for line in gff3.lines:
if line['type']=='CDS':
for i in line['attributes']['Parent']:
if i not in cds_parent:
cds_parent.append(i)

#parent_feature
for z in cds_parent:
newID2 = str(uuid.uuid4())
if z not in Copy_ID_dict_value:
Copy_ID_dict_value.append(z)
keyparent={'Parent':z,'ID':[newID2]}
#listid will store all newid in order
listid.append(keyparent['ID'])
#overwrite gff3 file
line['attributes'].update(keyparent)

cds_update_part = {}
for k,v in zip(Copy_ID_dict, listid):
cds_update_part.setdefault(k,v)
#cds_update_part.setdefault(k, []).append(v)

#overwrite report file
ID_dict.update(cds_update_part)

write_gff3(gff3, out_gff)
if report:
ID_order.append('missing')
logger.info('Generate a report of comparison between old and new IDs: (%s)', report)
out_line = 'Old_ID\tNewID'
out_report.write(out_line+'\n')

for key in ID_order:
for value in ID_dict[key]:
out_line = '%s\t%s' % (key, value)
out_report.write(out_line+'\n')

out_report.close()

if __name__ == '__main__':
Expand All @@ -318,6 +357,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit
parser.add_argument('-r', '--report', type=str, help='Generate a table of comparison between old and new IDs.')
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
parser.add_argument('-a', '--alias', action='store_true', default=False, help='Specify this argument if you want old IDs to be retained in the gff3 file as an Alias attribute')
parser.add_argument('-t','--type', type=str)

args = parser.parse_args()
main(in_gff=args.gff, merge_report=args.merge_report, out_merge_report=args.out_merge_report, out_gff=args.output_gff, uuid_on=args.universally_unique_identifier, prefix=args.idprefix, digitlen=args.digitlen, report=args.report, alias=args.alias)