From 83d180eac0e78e7dd002743cca93287d5da74aaf Mon Sep 17 00:00:00 2001 From: Hajime Suzuki Date: Tue, 14 Jul 2020 23:43:03 +0900 Subject: [PATCH] add --post option to split / split2 subcommands --- seqkit/cmd/helper.go | 18 +++++++++++++++--- seqkit/cmd/split.go | 32 ++++++++++++++------------------ seqkit/cmd/split2.go | 11 +++++------ 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/seqkit/cmd/helper.go b/seqkit/cmd/helper.go index d1316797..5fbe07e2 100644 --- a/seqkit/cmd/helper.go +++ b/seqkit/cmd/helper.go @@ -415,7 +415,20 @@ negative index 0-9-8-7-6-5-4-3-2-1 -12:-1 A C G T N a c g t n ` -func writeSeqs(records []*fastx.Record, file string, lineWidth int, quiet bool, dryRun bool) error { + +func openWriter(file string, post string) *xopen.Writer { + if post == "" { + outfh, err := xopen.Wopen(file) + checkError(err) + return(outfh) + } else { + outfh, err := xopen.WopenPipe(file, post) + checkError(err) + return(outfh) + } +} + +func writeSeqs(records []*fastx.Record, file string, post string, lineWidth int, quiet bool, dryRun bool) error { if !quiet { log.Infof("write %d sequences to file: %s\n", len(records), file) } @@ -423,8 +436,7 @@ func writeSeqs(records []*fastx.Record, file string, lineWidth int, quiet bool, return nil } - outfh, err := xopen.Wopen(file) - checkError(err) + outfh := openWriter(file, post) defer outfh.Close() for _, record := range records { diff --git a/seqkit/cmd/split.go b/seqkit/cmd/split.go index bc15f2e7..049a289a 100644 --- a/seqkit/cmd/split.go +++ b/seqkit/cmd/split.go @@ -88,6 +88,7 @@ Examples: dryRun := getFlagBool(cmd, "dry-run") outdir := getFlagString(cmd, "out-dir") + post := getFlagString(cmd, "post") force := getFlagBool(cmd, "force") file := files[0] @@ -171,14 +172,14 @@ Examples: records = append(records, record.Clone()) if len(records) == size { outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i, fileExt)) - writeSeqs(records, outfile, config.LineWidth, quiet, dryRun) + writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun) i++ records = []*fastx.Record{} } } if len(records) > 0 { outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i, fileExt)) - writeSeqs(records, outfile, config.LineWidth, quiet, dryRun) + writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun) } return @@ -245,8 +246,7 @@ Examples: if len(IDs) > 0 { outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), n, fileExt)) if !dryRun { - outfh, err = xopen.Wopen(outfile) - checkError(err) + outfh = openWriter(file, post) } } j := 0 @@ -273,8 +273,7 @@ Examples: outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), n, fileExt)) if !dryRun { outfh.Close() - outfh, err = xopen.Wopen(outfile) - checkError(err) + outfh = openWriter(file, post) } j = 0 } @@ -338,14 +337,14 @@ Examples: records = append(records, record) if len(records) == size { outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i, fileExt)) - writeSeqs(records, outfile, config.LineWidth, quiet, dryRun) + writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun) i++ records = []*fastx.Record{} } } if len(records) > 0 { outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i, fileExt)) - writeSeqs(records, outfile, config.LineWidth, quiet, dryRun) + writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun) } return } @@ -423,8 +422,7 @@ Examples: if len(IDs) > 0 { outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), n, fileExt)) if !dryRun { - outfh, err = xopen.Wopen(outfile) - checkError(err) + outfh = openWriter(file, post) } } j := 0 @@ -451,8 +449,7 @@ Examples: outfile = filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), n, fileExt)) if !dryRun { outfh.Close() - outfh, err = xopen.Wopen(outfile) - checkError(err) + outfh = openWriter(file, post) } j = 0 } @@ -514,7 +511,7 @@ Examples: outfile = filepath.Join(outdir, fmt.Sprintf("%s.id_%s%s", filepath.Base(fileName), pathutil.RemoveInvalidPathChars(id, "__"), fileExt)) - writeSeqs(records, outfile, config.LineWidth, quiet, dryRun) + writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun) } return } @@ -610,8 +607,7 @@ Examples: pathutil.RemoveInvalidPathChars(id, "__"), fileExt)) if !dryRun { - outfh, err = xopen.Wopen(outfile) - checkError(err) + outfh = openWriter(file, post) for _, chr := range _IDs { r, ok := faidx.Index[chr] if !ok { @@ -705,7 +701,7 @@ Examples: var outfile string for subseq, records := range recordsBySeqs { outfile = filepath.Join(outdir, fmt.Sprintf("%s.region_%d:%d_%s%s", filepath.Base(fileName), start, end, subseq, fileExt)) - writeSeqs(records, outfile, config.LineWidth, quiet, dryRun) + writeSeqs(records, outfile, post, config.LineWidth, quiet, dryRun) } return } @@ -810,8 +806,7 @@ Examples: outfile := filepath.Join(outdir, fmt.Sprintf("%s.region_%d:%d_%s%s", filepath.Base(fileName), start, end, subseq, fileExt)) if !dryRun { - outfh, err = xopen.Wopen(outfile) - checkError(err) + outfh = openWriter(file, post) for _, chr := range chrs { r, ok := faidx.Index[chr] @@ -855,6 +850,7 @@ func init() { splitCmd.Flags().BoolP("dry-run", "d", false, "dry run, just print message and no files will be created.") splitCmd.Flags().BoolP("keep-temp", "k", false, "keep tempory FASTA and .fai file when using 2-pass mode") splitCmd.Flags().StringP("out-dir", "O", "", "output directory (default value is $infile.split)") + splitCmd.Flags().StringP("post", "P", "", "postprocess shell command ($FILE for formatted out-dir)") splitCmd.Flags().BoolP("force", "f", false, "overwrite output directory") } diff --git a/seqkit/cmd/split2.go b/seqkit/cmd/split2.go index b44e1294..c6968043 100644 --- a/seqkit/cmd/split2.go +++ b/seqkit/cmd/split2.go @@ -87,6 +87,7 @@ according to the input files. } outdir := getFlagString(cmd, "out-dir") + post := getFlagString(cmd, "post") force := getFlagBool(cmd, "force") if size == 0 && parts == 0 && length == 0 { @@ -244,8 +245,7 @@ according to the input files. i++ var outfh2 *xopen.Writer outfile := filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i+1, fileExt)) - outfh2, err = xopen.Wopen(outfile) - checkError(err) + outfh2 = openWriter(outfile, post) outfhs = append(outfhs, outfh2) counts = append(counts, 0) @@ -264,8 +264,7 @@ according to the input files. i++ var outfh2 *xopen.Writer outfile := filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i+1, fileExt)) - outfh2, err = xopen.Wopen(outfile) - checkError(err) + outfh2 = openWriter(outfile, post) outfhs = append(outfhs, outfh2) counts = append(counts, 0) @@ -278,8 +277,7 @@ according to the input files. if i+1 > len(outfhs) || outfhs[i] == nil { var outfh2 *xopen.Writer outfile := filepath.Join(outdir, fmt.Sprintf("%s.part_%03d%s", filepath.Base(fileName), i+1, fileExt)) - outfh2, err = xopen.Wopen(outfile) - checkError(err) + outfh2 = openWriter(outfile, post) outfhs = append(outfhs, outfh2) counts = append(counts, 0) @@ -331,5 +329,6 @@ func init() { split2Cmd.Flags().IntP("by-part", "p", 0, "split sequences into N parts") split2Cmd.Flags().StringP("by-length", "l", "", "split sequences into chunks of N bases, supports K/M/G suffix") split2Cmd.Flags().StringP("out-dir", "O", "", "output directory (default value is $infile.split)") + split2Cmd.Flags().StringP("post", "P", "", "postprocess shell command ($FILE for formatted out-dir)") split2Cmd.Flags().BoolP("force", "f", false, "overwrite output directory") }