Skip to content

Commit

Permalink
Added option to write to directory with output name as tag value
Browse files Browse the repository at this point in the history
  • Loading branch information
BuysDB committed Apr 3, 2024
1 parent d18fdb2 commit 3df91ac
Showing 1 changed file with 15 additions and 3 deletions.
18 changes: 15 additions & 3 deletions singlecellmultiomics/bamProcessing/bamSplitByTag.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def split_bam_by_tag( input_bam_path, output_prefix, tag, head=None, max_handles
head(int) : write this amount of reads, then exit
"""

bamFile = pysam.AlignmentFile(input_bam_path, "rb")
header = bamFile.header.copy()

Expand Down Expand Up @@ -57,7 +58,7 @@ def split_bam_by_tag( input_bam_path, output_prefix, tag, head=None, max_handles
waiting.add(value)
continue
print(f'\rOpened bam file for {value} ', end='')
output_handles[value] = pysam.AlignmentFile(f'{output_prefix}.{value}.bam', "wb", header=header)
output_handles[value] = pysam.AlignmentFile(f'{output_prefix}{value}.bam', "wb", header=header)

output_handles[value].write(r)
written+=1
Expand Down Expand Up @@ -94,19 +95,30 @@ def split_bam_by_tag( input_bam_path, output_prefix, tag, head=None, max_handles
argparser.add_argument(
'-o',
type=str,
required=True,
required=False,
help='output bam prefix, to the end of the file name the tag value is appended')
argparser.add_argument(
'-o_folder',
type=str,
required=True,
help='All files will be written to this folder, with just using the tag as file name')

argparser.add_argument('-head', type=int)
args = argparser.parse_args()
if args.o_folder is not None:
if not os.path.exists(args.o_folder):
os.makedirs(args.o_folder)
output_prefix= args.o_folder
else:
output_prefix = args.o

skip= set()

waiting = set([0]) #
iteration = 1
while len(waiting)>0:
print(f'Iteration {iteration}')
done, waiting = split_bam_by_tag(args.bamfile, args.o, args.tag, head=args.head, max_handles=args.max_handles,skip=skip )
done, waiting = split_bam_by_tag(args.bamfile, tag=args.tag, output_prefix=output_prefix, head=args.head, max_handles=args.max_handles,skip=skip )
print('Wrote bam files for tag values:')
for d in done:
print(f'\t{d}')
Expand Down

0 comments on commit 3df91ac

Please sign in to comment.