-
Notifications
You must be signed in to change notification settings - Fork 1
/
combine_column.py
executable file
·61 lines (49 loc) · 1.81 KB
/
combine_column.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
import sys,os,re
import pandas as pd
from argparse import ArgumentParser
##################################################################
def main( ):
args = UserInput()
delim = '\s+'
if args.delim:
delim = args.delim
if args.sep:
sep = args.sep
else:
sep = '\t'
f1_df = pd.read_csv(args.file_1, delimiter=delim)
f2_df = pd.read_csv(args.file_2, delimiter=delim)
if len(f1_df.index) != len(f2_df.index):
sys.exit('\n ERROR: file_1 {} and file_2 {} do not match\n'.format(
len(f1_df.index), len(f2_df.index)) )
final_df = f1_df
for col in args.cols:
final_df = pd.concat( [final_df, f2_df.iloc[:,int(col)-1]] , axis=1)
print(final_df.to_csv(sep=sep, index=False))
#################################################################
def UserInput():
p = ArgumentParser(description='Command Line Arguments')
p.add_argument('-in1', dest='file_1', required=True,
help='file_1')
p.add_argument('-in2', dest='file_2', required=True,
help='file_2')
p.add_argument('-cols', dest='cols', required=True, nargs='+',
help='Column(s) from file_2 to graft onto file_1 (multiple ok)')
p.add_argument('-delim', dest='delim', required=False,
help='Delimiter for input file (def: "\s+")')
p.add_argument('-sep', dest='sep', required=False,
help='Delimiter for output file (def: "\t")')
return p.parse_args()
##################################################################
if __name__ == '__main__':
main( )
##################################################################
#
# Peter MU Ung @ MSSM/Yale
#
# v1 19.12.09
#
# rewrite of combine_column.pl in python format to fix some Perl issue.
# Using Pandas to streamline the organization and write out functions.
#