From bd7ad0261e72246ac0e0313ceb7d1e887a62c806 Mon Sep 17 00:00:00 2001 From: BrianWeiHaoMa Date: Wed, 25 Sep 2024 14:31:15 -0400 Subject: [PATCH] initial commit --- .gitignore | 3 + LICENSE | 21 + README.md | 100 ++++ csvcheckcli/csvcheckcli.go | 270 ++++++++++ csvcheckcli/csvcheckcli_test.go | 875 ++++++++++++++++++++++++++++++++ go.mod | 18 + go.sum | 26 + main.go | 68 +++ 8 files changed, 1381 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 csvcheckcli/csvcheckcli.go create mode 100644 csvcheckcli/csvcheckcli_test.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..968fcce --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/input_files +/output_files +/tmp \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f3a1bf9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Brian Wei Hao Ma + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6d8049e --- /dev/null +++ b/README.md @@ -0,0 +1,100 @@ +# csvcheckcli +A command-line tool for comparing the rows of different csv files. + +## Installation +Check the releases and install the executable directly or clone the repository +and the compile manually with go using +``` +git clone https://github.com/BrianWeiHaoMa/csvcheckcli.git +cd ./csvcheckcli +go build +``` + +## Options +Use ./csvcheckcli -h (or ./csvcheckcli.exe -h depending on what OS you are using) to view the options +``` + -t, --addtimestamp Whether or not to add a timestamp to the output file name. + -a, --autoalign Whether or not to auto align the columns of the csv files. Common columns will be aligned on the left side. + -r, --columnsarrangement1 stringArray An arrangement for the columns in the first output. + -R, --columnsarrangement2 stringArray An arrangement for the columns in the second output. + -p, --csv Whether to print the output in csv format. By default, the output is printed in a columns-aligned. + -D, --deletecolumns stringArray The columns to delete in the output. + -f, --files stringArray The input files paths to compare. 2 should be provided. + -F, --function string The function to use for comparison. Options: common, different. A function must be given. + -i, --ignorecolumns stringArray The columns to ignore for comparison. + -d, --inputdir string The directory containing the input files. This will be prepended to the input file paths. Must be given. + -K, --keepcolumns stringArray The columns to keep in the output. + -k, --keepindex Whether to keep the indices from the original csv of the rows in the result (_ind column will be added). + -m, --method string The method to use for comparison. Options: match, set, direct. By default, set is used. (default "set") + -o, --outputdir string The directory to write the output files to. + -l, --prettyformatmaxlength int The maximum length before truncation of a column entry when printing in pretty format. Negative values mean no limit. By default, there is no limit. (default -1) + -c, --usecolumns stringArray The columns to use for comparison. + -C, --usecommoncolumns Whether to use all the common columns between the csv files for comparison. +``` + +## Examples +We will use the input files csv1.csv and csv2.csv for these examples. + +csv1.csv +| a | b | c | +|----|----|----| +| 1 | 2 | 3 | +| 4 | 5 | 6 | +| 7 | 8 | 9 | +| 10 | 11 | 12 | + +csv2.csv +| a | b | c | +|----|----|----| +| 0 | 0 | 0 | +| 1 | 2 | 3 | +| 1 | 2 | 3 | +| 5 | 5 | 5 | +| 11 | 11 | 11 | + +### Example 1: +#### Input: +``` +.\csvcheckcli.exe -d .\input_files\ -k -f csv1.csv,csv2.csv -F common -o output_files +``` + +#### Output: +``` +Start time: 2024-09-25 13:51:22 + +Results for file csv1.csv: +a b c _ind +1 2 3 1 + +Results for file csv2.csv: +a b c _ind +1 2 3 2 +1 2 3 3 + +Results written to output_files\csvcheck_csv1.csv and output_files\csvcheck_csv2.csv. +``` + +### Example 2: +#### Input: +``` +.\csvcheckcli.exe -d .\input_files\ -k -f csv1.csv,csv2.csv -F different -r c,b,a,_ind -R _ind,c,b,a -p -o output_files +``` + +#### Output: +``` +Start time: 2024-09-25 13:54:42 + +Results for file csv1.csv: +c,b,a,_ind +6,5,4,2 +9,8,7,3 +12,11,10,4 + +Results for file csv2.csv: +_ind,c,b,a +1,0,0,0 +4,5,5,5 +5,11,11,11 + +Results written to output_files\csvcheck_csv1.csv and output_files\csvcheck_csv2.csv. +``` \ No newline at end of file diff --git a/csvcheckcli/csvcheckcli.go b/csvcheckcli/csvcheckcli.go new file mode 100644 index 0000000..f74ab07 --- /dev/null +++ b/csvcheckcli/csvcheckcli.go @@ -0,0 +1,270 @@ +package csvcheckcli + +import ( + "encoding/csv" + "fmt" + "log" + "os" + + "github.com/BrianWeiHaoMa/csvcheck" + + "github.com/spf13/pflag" +) + +const IndexColumnName = "_ind" + +const MethodStringMatch = "match" +const MethodStringSet = "set" +const MethodStringDirect = "direct" + +const FunctionStringCommon = "common" +const FunctionStringDifferent = "different" + +var MethodMappings = map[string]int{ + MethodStringMatch: csvcheck.MethodMatch, + MethodStringSet: csvcheck.MethodSet, + MethodStringDirect: csvcheck.MethodDirect, +} + +type UserInput struct { + InputDir *string + Files *[]string + Method *string + Function *string + KeepIndex *bool + OutputDir *string + AddTimestamp *bool + ColumnsToUse *[]string + ColumnsToIgnore *[]string + AutoAlign *bool + UseCommonColumns *bool + ColumnsToKeep *[]string + ColumnsToDelete *[]string + ColumnsArrangement1 *[]string + ColumnsArrangement2 *[]string + PrintInCsvFormat *bool + PrettyFormatMaxLength *int +} + +func ParseUserInput(input *UserInput) (UserInput, error) { + var res UserInput + if input == nil { + res = UserInput{} + res.InputDir = pflag.StringP("inputdir", "d", "", "The directory containing the input files. This will be prepended to the input file paths. Must be given.") + res.Files = pflag.StringSliceP("files", "f", []string{}, "The input files paths to compare. 2 should be provided.") + res.Method = pflag.StringP("method", "m", "set", "The method to use for comparison. Options: match, set, direct. By default, set is used.") + res.Function = pflag.StringP("function", "F", "", "The function to use for comparison. Options: common, different. A function must be given.") + res.KeepIndex = pflag.BoolP("keepindex", "k", false, fmt.Sprintf("Whether to keep the indices from the original csv of the rows in the result (%s column will be added).", IndexColumnName)) + res.OutputDir = pflag.StringP("outputdir", "o", "", "The directory to write the output files to.") + res.AddTimestamp = pflag.BoolP("addtimestamp", "t", false, "Whether or not to add a timestamp to the output file name.") + res.ColumnsToUse = pflag.StringSliceP("usecolumns", "c", nil, "The columns to use for comparison.") + res.ColumnsToIgnore = pflag.StringSliceP("ignorecolumns", "i", nil, "The columns to ignore for comparison.") + res.AutoAlign = pflag.BoolP("autoalign", "a", false, "Whether or not to auto align the columns of the csv files. Common columns will be aligned on the left side.") + res.UseCommonColumns = pflag.BoolP("usecommoncolumns", "C", false, "Whether to use all the common columns between the csv files for comparison.") + res.ColumnsToKeep = pflag.StringSliceP("keepcolumns", "K", nil, "The columns to keep in the output.") + res.ColumnsToDelete = pflag.StringSliceP("deletecolumns", "D", nil, "The columns to delete in the output.") + res.ColumnsArrangement1 = pflag.StringSliceP("columnsarrangement1", "r", nil, "An arrangement for the columns in the first output.") + res.ColumnsArrangement2 = pflag.StringSliceP("columnsarrangement2", "R", nil, "An arrangement for the columns in the second output.") + res.PrintInCsvFormat = pflag.BoolP("csv", "p", false, "Whether to print the output in csv format. By default, the output is printed in a columns-aligned.") + res.PrettyFormatMaxLength = pflag.IntP("prettyformatmaxlength", "l", -1, "The maximum length before truncation of a column entry when printing in pretty format. Negative values mean no limit. By default, there is no limit.") + + pflag.Parse() + } else { + res = *input + } + + if *res.InputDir == "" { + return UserInput{}, fmt.Errorf("inputdir must be given") + } + + if len(*res.Files) != 2 { + return UserInput{}, fmt.Errorf("exactly 2 file paths needed") + } + + if _, exists := MethodMappings[*res.Method]; !exists { + return UserInput{}, fmt.Errorf("unsupported method %s", *res.Method) + } + + columnsCompInputCnt := 0 + if *res.ColumnsToUse != nil { + columnsCompInputCnt++ + } + if *res.ColumnsToIgnore != nil { + columnsCompInputCnt++ + } + if *res.UseCommonColumns { + columnsCompInputCnt++ + } + if columnsCompInputCnt > 1 { + return UserInput{}, fmt.Errorf("usecolumns, ignorecolumns, and usecommoncolumns cannot be used together") + } + + columnsResInputCnt := 0 + if *res.ColumnsToKeep != nil { + columnsResInputCnt++ + } + if *res.ColumnsToDelete != nil { + columnsResInputCnt++ + } + if columnsResInputCnt > 1 { + return UserInput{}, fmt.Errorf("keepcolumns and deletecolumns cannot be used together") + } + + switch *res.Function { + case FunctionStringCommon: + case FunctionStringDifferent: + case "": + return UserInput{}, fmt.Errorf("function must be given") + default: + return UserInput{}, fmt.Errorf("unsupported function %s", *res.Function) + } + + return res, nil +} + +// Adds the index to the row on the right side. +func addIndexToRow(row []csvcheck.StringHashable, index int) []csvcheck.StringHashable { + newLength := len(row) + 1 + res := make([]csvcheck.StringHashable, newLength) + res[newLength-1] = csvcheck.BasicStringHashable(fmt.Sprintf("%d", index)) + copy(res[:newLength-1], row) + return res +} + +// Gets the result arrays based off of user input. +func GetResArrays(csvArray1, csvArray2 [][]csvcheck.StringHashable, input UserInput) ([][]csvcheck.StringHashable, [][]csvcheck.StringHashable, error) { + columnsToUse := csvcheck.GetRowFromRow(*input.ColumnsToUse) + columnsToIgnore := csvcheck.GetRowFromRow(*input.ColumnsToIgnore) + + var err error = nil + if *input.UseCommonColumns { + columnsToUse, err = csvcheck.GetCommonColumns(csvArray1, csvArray2) + if err != nil { + return nil, nil, err + } + } + + if *input.AutoAlign { + csvArray1, csvArray2, err = csvcheck.AutoAlignCsvArrays(csvArray1, csvArray2) + if err != nil { + return nil, nil, err + } + } + + options := csvcheck.Options{ + Method: MethodMappings[*input.Method], + UseColumns: columnsToUse, + IgnoreColumns: columnsToIgnore, + } + + if *input.KeepIndex { + options.SortIndices = true + } + + var res1 = [][]csvcheck.StringHashable{} + var res2 = [][]csvcheck.StringHashable{} + var indices1 = []int{} + var indices2 = []int{} + switch *input.Function { + case FunctionStringCommon: + res1, res2, indices1, indices2, err = csvcheck.GetCommonRows(csvArray1, csvArray2, options) + case FunctionStringDifferent: + res1, res2, indices1, indices2, err = csvcheck.GetDifferentRows(csvArray1, csvArray2, options) + default: + return nil, nil, fmt.Errorf("unsupported function") + } + + if err != nil { + return nil, nil, err + } + + if *input.KeepIndex { + res1[0] = append(res1[0], csvcheck.BasicStringHashable(IndexColumnName)) + for i := 1; i < len(res1); i++ { + res1[i] = addIndexToRow(res1[i], indices1[i]) + } + res2[0] = append(res2[0], csvcheck.BasicStringHashable(IndexColumnName)) + for i := 1; i < len(res2); i++ { + res2[i] = addIndexToRow(res2[i], indices2[i]) + } + } + + columnsToKeep := csvcheck.GetRowFromRow(*input.ColumnsToKeep) + columnsToDelete := csvcheck.GetRowFromRow(*input.ColumnsToDelete) + if columnsToKeep == nil { + columnsToKeep = append(res1[0], res2[0]...) + } + if columnsToDelete == nil { + columnsToDelete = []csvcheck.StringHashable{} + } + + res1, err = csvcheck.KeepColumns(res1, columnsToKeep) + if err != nil { + return nil, nil, err + } + res2, err = csvcheck.KeepColumns(res2, columnsToKeep) + if err != nil { + return nil, nil, err + } + + res1, err = csvcheck.IgnoreColumns(res1, columnsToDelete) + if err != nil { + return nil, nil, err + } + res2, err = csvcheck.IgnoreColumns(res2, columnsToDelete) + if err != nil { + return nil, nil, err + } + + columnsArrangement1 := csvcheck.GetRowFromRow(*input.ColumnsArrangement1) + columnsArrangement2 := csvcheck.GetRowFromRow(*input.ColumnsArrangement2) + if *input.ColumnsArrangement1 != nil { + res1, err = csvcheck.RearrangeColumns(res1, columnsArrangement1) + if err != nil { + return nil, nil, err + } + } + if *input.ColumnsArrangement2 != nil { + res2, err = csvcheck.RearrangeColumns(res2, columnsArrangement2) + if err != nil { + return nil, nil, err + } + } + + return res1, res2, nil +} + +func ReadCsvFile(filePath string) [][]csvcheck.StringHashable { + file, err := os.Open(filePath) + if err != nil { + log.Panic(err) + } + defer file.Close() + + reader := csv.NewReader(file) + + records, err := reader.ReadAll() + if err != nil { + log.Panic(err) + } + + res := make([][]csvcheck.StringHashable, len(records)) + for i, record := range records { + res[i] = csvcheck.GetRowFromRow(record) + } + + return res +} + +func WriteString(filePath string, content string) { + file, err := os.Create(filePath) + if err != nil { + panic(err) + } + defer file.Close() + + _, err = file.WriteString(content) + if err != nil { + panic(err) + } +} diff --git a/csvcheckcli/csvcheckcli_test.go b/csvcheckcli/csvcheckcli_test.go new file mode 100644 index 0000000..e0799d0 --- /dev/null +++ b/csvcheckcli/csvcheckcli_test.go @@ -0,0 +1,875 @@ +package csvcheckcli_test + +import ( + "csvcheckcli/csvcheckcli" + "encoding/csv" + "fmt" + "strings" + "testing" + + "github.com/BrianWeiHaoMa/csvcheck" + "github.com/stretchr/testify/assert" +) + +func Get2DArrayFromCsvString(csvString string) [][]csvcheck.StringHashable { + reader := csv.NewReader(strings.NewReader(csvString)) + records, err := reader.ReadAll() + if err != nil { + panic(err) + } + + res := make([][]csvcheck.StringHashable, len(records)) + for i, row := range records { + res[i] = make([]csvcheck.StringHashable, len(row)) + for j, cell := range row { + res[i][j] = csvcheck.BasicStringHashable(cell) + } + } + return res +} + +type userInputSolid struct { + inputDir string + files []string + method string + function string + keepIndex bool + outputDir string + addTimestamp bool + columnsToUse []string + columnsToIgnore []string + autoAlign bool + useCommonColumns bool + ColumnsToKeep []string + ColumnsToDelete []string + ColumnsArrangement1 []string + ColumnsArrangement2 []string +} + +func (o userInputSolid) getUserInput() csvcheckcli.UserInput { + return csvcheckcli.UserInput{ + InputDir: &o.inputDir, + Files: &o.files, + Method: &o.method, + Function: &o.function, + KeepIndex: &o.keepIndex, + OutputDir: &o.outputDir, + AddTimestamp: &o.addTimestamp, + ColumnsToUse: &o.columnsToUse, + ColumnsToIgnore: &o.columnsToIgnore, + AutoAlign: &o.autoAlign, + UseCommonColumns: &o.useCommonColumns, + ColumnsToKeep: &o.ColumnsToKeep, + ColumnsToDelete: &o.ColumnsToDelete, + ColumnsArrangement1: &o.ColumnsArrangement1, + ColumnsArrangement2: &o.ColumnsArrangement2, + } +} + +func TestParseUserInputProperAndImproperInputs(t *testing.T) { + for i, data := range []struct { + input csvcheckcli.UserInput + expectError bool + }{ + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file1.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringCommon, + keepIndex: true, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"column1", "column2"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + }.getUserInput(), + expectError: false, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file1.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringCommon, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: true, + }.getUserInput(), + expectError: false, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringCommon, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: true, + }.getUserInput(), + expectError: true, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringCommon, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: true, + }.getUserInput(), + expectError: true, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringCommon, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"column1", "column2"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: true, + }.getUserInput(), + expectError: true, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringCommon, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"column1", "column2"}, + columnsToIgnore: []string{"column1", "column2"}, + autoAlign: true, + useCommonColumns: false, + }.getUserInput(), + expectError: true, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: "subtract", + function: csvcheckcli.FunctionStringCommon, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: true, + }.getUserInput(), + expectError: true, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: "flip", + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: true, + }.getUserInput(), + expectError: true, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: "common", + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"column1", "column2"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: []string{"column1"}, + ColumnsToDelete: nil, + ColumnsArrangement1: nil, + ColumnsArrangement2: []string{"column1"}, + }.getUserInput(), + expectError: false, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: "common", + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"column1", "column2"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: []string{"column1"}, + ColumnsToDelete: []string{"column1"}, + ColumnsArrangement1: nil, + ColumnsArrangement2: []string{"column1"}, + }.getUserInput(), + expectError: true, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: "common", + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: true, + ColumnsToKeep: nil, + ColumnsToDelete: []string{"column1"}, + ColumnsArrangement1: []string{"column1"}, + ColumnsArrangement2: []string{"column1"}, + }.getUserInput(), + expectError: false, + }, + { + input: userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: "", + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: true, + ColumnsToKeep: nil, + ColumnsToDelete: []string{"column1"}, + ColumnsArrangement1: []string{"column1"}, + ColumnsArrangement2: []string{"column1"}, + }.getUserInput(), + expectError: true, + }, + } { + indexString := fmt.Sprintf("Test case index: %d", i) + if data.expectError { + _, err := csvcheckcli.ParseUserInput(&data.input) + assert.NotNil(t, err, indexString) + } else { + _, err := csvcheckcli.ParseUserInput(&data.input) + assert.Nil(t, err, indexString) + } + } +} + +func TestGetResArraysCommonMatchKeepIndex(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringCommon, + keepIndex: true, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: false, + useCommonColumns: true, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +a,b,c +7,8,9 +1,2,3 +1,2,3 +4,5,6 +7,8,9 +7,8,9 +10,10,10 +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(fmt.Sprintf(` +a,b,c,%s +1,2,3,1 +4,5,6,2 +7,8,9,3 +7,8,9,4 +`, csvcheckcli.IndexColumnName)) + + expected2 := Get2DArrayFromCsvString(fmt.Sprintf(` +a,b,c,%s +7,8,9,1 +1,2,3,2 +4,5,6,4 +7,8,9,5 +`, csvcheckcli.IndexColumnName)) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} + +func TestGetResArraysDifferentSetKeepIndex(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringSet, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: true, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: false, + useCommonColumns: true, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +-1,-1,-1 +1,2,3 +4,5,6 +7,8,9 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +a,b,c +7,8,9 +1,2,3 +1,2,3 +4,5,6 +7,8,9 +7,8,9 +10,10,10 +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(fmt.Sprintf(` +a,b,c,%s +-1,-1,-1,1 +`, csvcheckcli.IndexColumnName)) + + expected2 := Get2DArrayFromCsvString(fmt.Sprintf(` +a,b,c,%s +10,10,10,7 +`, csvcheckcli.IndexColumnName)) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} + +func TestGetResArraysDifferentMatch(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: nil, + columnsToIgnore: nil, + autoAlign: false, + useCommonColumns: true, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +a,b,c +7,8,9 +1,2,3 +1,2,3 +4,5,6 +7,8,9 +7,8,9 +10,10,10 +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(` +a,b,c +`) + + expected2 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +7,8,9 +10,10,10 +`) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} + +func TestGetResArraysDifferentMatchColumnsToUse(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: false, + useCommonColumns: false, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(` +a,b,c +4,5,6 +`) + + expected2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,7,z +`) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} + +func TestGetResArraysDifferentMatchColumnsToUseAutoAlign(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(` +a,b,c +4,5,6 +`) + + expected2 := Get2DArrayFromCsvString(` +a,d,z +b,a,c +7,z,z +`) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} + +func TestGetResArraysDifferentMatchUseCommonColumnsAutoAlign(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(` +a,b,c +4,5,6 +`) + + expected2 := Get2DArrayFromCsvString(` +a,d,z +b,a,c +7,z,z +`) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} + +func TestGetResArraysDifferentMatchUseCommonColumnsAutoAlignKeepCommonAndDifferentColumns(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: []string{"a", "z", "c"}, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(` +a,c +4,6 +`) + + expected2 := Get2DArrayFromCsvString(` +a,z +b,c +7,z +`) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} + +func TestGetResArraysDifferentMatchUseCommonColumnsAutoAlignKeepCommonAndDifferentColumnsAndRearrange(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: []string{"a", "z", "c"}, + ColumnsArrangement1: []string{"c", "a"}, + ColumnsArrangement2: []string{"z", "a"}, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(` +c,a +6,4 +`) + + expected2 := Get2DArrayFromCsvString(` +z,a +c,b +z,7 +`) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} + +func TestGetResArraysDifferentMatchRearrangeWithNonExistentColumnsForColumnsArrangement1(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: []string{"a", "z", "c"}, + ColumnsArrangement1: []string{"c", "a"}, + ColumnsArrangement2: []string{"z", "a", "p"}, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + _, _, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + assert.NotNil(t, err) +} + +func TestGetResArraysDifferentMatchRearrangeWithNonExistentColumnsForColumnsArrangement2(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: []string{"a", "z", "c"}, + ColumnsArrangement1: []string{"c", "a", "aaa"}, + ColumnsArrangement2: []string{"z", "a"}, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + _, _, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + assert.NotNil(t, err) +} + +func TestGetResArraysDifferentMatchRearrangeWithMissingColumnsOnColumnsArrangement1(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: []string{"a", "z", "c"}, + ColumnsArrangement1: []string{"c"}, + ColumnsArrangement2: []string{"z", "a"}, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + _, _, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + assert.NotNil(t, err) +} + +func TestGetResArraysDifferentMatchRearrangeWithMissingColumnsOnColumnsArrangement2(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: []string{"a", "z", "c"}, + ColumnsArrangement1: []string{"c", "a"}, + ColumnsArrangement2: []string{"a"}, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + _, _, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + assert.NotNil(t, err) +} + +func TestGetResArraysDifferentMatchUseCommonColumnsAutoAlignDeleteCommonAndDifferentColumnsAndRearrange(t *testing.T) { + input := userInputSolid{ + inputDir: "/path/to/input/dir", + files: []string{"file2.csv", "file2.csv"}, + method: csvcheckcli.MethodStringMatch, + function: csvcheckcli.FunctionStringDifferent, + keepIndex: false, + outputDir: "/path/to/output/dir", + addTimestamp: true, + columnsToUse: []string{"a"}, + columnsToIgnore: nil, + autoAlign: true, + useCommonColumns: false, + ColumnsToKeep: nil, + ColumnsToDelete: []string{"a", "c"}, + ColumnsArrangement1: nil, + ColumnsArrangement2: nil, + }.getUserInput() + + arr1 := Get2DArrayFromCsvString(` +a,b,c +1,2,3 +4,5,6 +7,8,9 +`) + arr2 := Get2DArrayFromCsvString(` +d,a,z +a,b,c +z,1,z +z,7,z +z,7,z +`) + res1, res2, err := csvcheckcli.GetResArrays(arr1, arr2, input) + + expected1 := Get2DArrayFromCsvString(` +b +5 +`) + + expected2 := Get2DArrayFromCsvString(` +d,z +a,c +z,z +`) + + assert.Nil(t, err) + assert.Equal(t, expected1, res1) + assert.Equal(t, expected2, res2) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..fd426d7 --- /dev/null +++ b/go.mod @@ -0,0 +1,18 @@ +module csvcheckcli + +go 1.23.0 + +require ( + github.com/BrianWeiHaoMa/csvcheck v0.1.1 + github.com/spf13/cobra v1.8.1 + github.com/spf13/pflag v1.0.5 + github.com/stretchr/testify v1.9.0 +) + +require ( + github.com/cespare/xxhash v1.1.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f65ed53 --- /dev/null +++ b/go.sum @@ -0,0 +1,26 @@ +github.com/BrianWeiHaoMa/csvcheck v0.1.1 h1:b8iKb68JzMa8IhqKO8OPTXZdOgRKmaEqZBKIlouKOHw= +github.com/BrianWeiHaoMa/csvcheck v0.1.1/go.mod h1:R8T6U3KaLtZ+6XkAVcKUc4IegkBSWRgc3StrUnEmEwY= +github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..bbd0122 --- /dev/null +++ b/main.go @@ -0,0 +1,68 @@ +package main + +import ( + "csvcheckcli/csvcheckcli" + "fmt" + "log" + "path/filepath" + "time" + + "github.com/BrianWeiHaoMa/csvcheck" +) + +func main() { + input, err := csvcheckcli.ParseUserInput(nil) + if err != nil { + log.Fatalf("error parsing input:\n%s", err) + } + + csvPath1 := filepath.Join(*input.InputDir, (*input.Files)[0]) + csvPath2 := filepath.Join(*input.InputDir, (*input.Files)[1]) + + csvArray1 := csvcheckcli.ReadCsvFile(csvPath1) + csvArray2 := csvcheckcli.ReadCsvFile(csvPath2) + + fileName1 := filepath.Base(csvPath1) + fileName2 := filepath.Base(csvPath2) + + currentTime := time.Now() + fmt.Printf("Start time: %s\n\n", currentTime.Format("2006-01-02 15:04:05")) + + res1, res2, err := csvcheckcli.GetResArrays(csvArray1, csvArray2, input) + if err != nil { + log.Fatal(err) + } + + resString1, _ := csvcheck.StringFormatCsvArray(res1) + resString2, _ := csvcheck.StringFormatCsvArray(res2) + if *input.PrintInCsvFormat { + fmt.Printf("Results for file %s:\n%s\n", fileName1, resString1) + fmt.Printf("Results for file %s:\n%s\n", fileName2, resString2) + } else { + prettyResString1, _ := csvcheck.PrettyFormatCsvArray(res1, 2, *input.PrettyFormatMaxLength) + prettyResString2, _ := csvcheck.PrettyFormatCsvArray(res2, 2, *input.PrettyFormatMaxLength) + fmt.Printf("Results for file %s:\n%s\n", fileName1, prettyResString1) + fmt.Printf("Results for file %s:\n%s\n", fileName2, prettyResString2) + } + + if *input.OutputDir != "" { + fileNameNoExt1 := fileName1[:len(fileName1)-len(filepath.Ext(fileName1))] + fileNameNoExt2 := fileName2[:len(fileName2)-len(filepath.Ext(fileName2))] + + resFileName1 := fmt.Sprintf("csvcheck_%s.csv", fileNameNoExt1) + resFileName2 := fmt.Sprintf("csvcheck_%s.csv", fileNameNoExt2) + if *input.AddTimestamp { + timeString := currentTime.Format("2006_01_02_15_04_05") + resFileName1 = fmt.Sprintf("csvcheck_%s_%s.csv", fileNameNoExt1, timeString) + resFileName2 = fmt.Sprintf("csvcheck_%s_%s.csv", fileNameNoExt2, timeString) + } + + outputPath1 := filepath.Join(*input.OutputDir, resFileName1) + outputPath2 := filepath.Join(*input.OutputDir, resFileName2) + + csvcheckcli.WriteString(outputPath1, resString1) + csvcheckcli.WriteString(outputPath2, resString2) + + fmt.Printf("Results written to %s and %s.\n", outputPath1, outputPath2) + } +}