diff --git a/gff3tool/lib/gff3_ID_generator.py b/gff3tool/lib/gff3_ID_generator.py index a4f824c..b4d5328 100644 --- a/gff3tool/lib/gff3_ID_generator.py +++ b/gff3tool/lib/gff3_ID_generator.py @@ -180,6 +180,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit 'missing': [] } ID_order = [] + Copy_ID_dict=[] roots = list() logger.info('Generate new ID for features in (%s)', in_gff) for line in gff3.lines: @@ -188,10 +189,14 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit if uuid_on: newID = str(uuid.uuid1()) if 'ID' in line['attributes']: + #print line['attributes'] if line['attributes']['ID'] in ID_dict: + #print line['attributes']['ID'] ID_dict[line['attributes']['ID']].append(newID) if alias: line['attributes']['Alias'] = line['attributes']['ID'] + if line['attributes']['ID'] not in Copy_ID_dict: + Copy_ID_dict.append(line['attributes']['ID']) line['attributes']['ID'] = newID else: ID_dict[line['attributes']['ID']] = [newID] @@ -202,6 +207,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit else: ID_dict['missing'].append(newID) line['attributes']['ID'] = newID + if 'Parent' in line['attributes']: for index, parent in enumerate(line['attributes']['Parent']): if parent in ID_dict: @@ -217,6 +223,8 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit except KeyError: logger.warning('[Missing Attributes] Line (%s)', str(line['line_index'] + 1)) IDnumber = 0 + #print Copy_ID_dict + #gene parent for root in roots: newID = idgenerator(prefix, IDnumber, digitlen) IDnumber = newID['maxnum'] @@ -231,7 +239,6 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit for index, parent in enumerate(child['attributes']['Parent']): if parent in ID_dict: child['attributes']['Parent'][index] = newID['ID'] - newcID = '%s-R%s' % (newID['ID'], alphabets.pop(0)) ID_dict[child['attributes']['ID']] = [newcID] ID_order.append(child['attributes']['ID']) @@ -271,6 +278,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit descend['attributes']['ID'] = newdID['ID'] flag = True descend['attributes']['Parent'][index] = ID_dict[parent][0] + if merge_report and out_merge_report: logger.info('Update report file generated by gff3_merge program with new IDs.') with open(out_merge_report, 'w') as out_f: @@ -287,17 +295,48 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit for log_line in log_lines: out_f.write('\t'.join(log_line) + '\n') logger.info('Write out gff3 file: (%s)', out_gff) + + #cds_share_id + cds_parent=[] + Copy_ID_dict_value=[] + listid=[] + if args.type: + for line in gff3.lines: + if line['type']=='CDS': + for i in line['attributes']['Parent']: + if i not in cds_parent: + cds_parent.append(i) + + #parent_feature + for z in cds_parent: + newID2 = str(uuid.uuid4()) + if z not in Copy_ID_dict_value: + Copy_ID_dict_value.append(z) + keyparent={'Parent':z,'ID':[newID2]} + #listid will store all newid in order + listid.append(keyparent['ID']) + #overwrite gff3 file + line['attributes'].update(keyparent) + + cds_update_part = {} + for k,v in zip(Copy_ID_dict, listid): + cds_update_part.setdefault(k,v) + #cds_update_part.setdefault(k, []).append(v) + + #overwrite report file + ID_dict.update(cds_update_part) + write_gff3(gff3, out_gff) if report: ID_order.append('missing') logger.info('Generate a report of comparison between old and new IDs: (%s)', report) out_line = 'Old_ID\tNewID' out_report.write(out_line+'\n') + for key in ID_order: for value in ID_dict[key]: out_line = '%s\t%s' % (key, value) out_report.write(out_line+'\n') - out_report.close() if __name__ == '__main__': @@ -318,6 +357,7 @@ def main(in_gff, merge_report, out_merge_report, out_gff, uuid_on, prefix, digit parser.add_argument('-r', '--report', type=str, help='Generate a table of comparison between old and new IDs.') parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) parser.add_argument('-a', '--alias', action='store_true', default=False, help='Specify this argument if you want old IDs to be retained in the gff3 file as an Alias attribute') + parser.add_argument('-t','--type', type=str) args = parser.parse_args() main(in_gff=args.gff, merge_report=args.merge_report, out_merge_report=args.out_merge_report, out_gff=args.output_gff, uuid_on=args.universally_unique_identifier, prefix=args.idprefix, digitlen=args.digitlen, report=args.report, alias=args.alias)