Skip to content

Commit

Permalink
Allow specifying backslash-escaped delimiters (#55)
Browse files Browse the repository at this point in the history
This change makes two changes to how delimiters are specified:
1. It allows other backslash-escaped delimiters, such as those that
represent hexadecimal or unicode characters (like `\x01` per #54).
Previously we had a special case to handle the tab-delimiter `\t`.
2. It will now exit with an error if the provided delimiter does not
evaluate to exactly 1 rune. Previously it would silently accept it,
which is not great since it could lead to some strange behavior for
users.

Fixes #54
  • Loading branch information
aotimme authored Dec 15, 2023
1 parent bf5b2ef commit 52ab92e
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 17 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ cat test-files/left-table.csv \
## Changing the Default Delimiter
While `gocsv` generally assumes standard CSVs (per [RFC 4180](https://tools.ietf.org/html/rfc4180)), you can specify a default delimiter other than `,` using the `GOCSV_DELIMITER` environment variable.
While `gocsv` generally assumes standard CSVs (per [RFC 4180](https://tools.ietf.org/html/rfc4180)), you can specify a default delimiter other than `,` using the `GOCSV_DELIMITER` environment variable. The delimiter _must_ evaluate to exactly 1 ["rune"](https://go.dev/doc/go1#rune). If it does not, `gocsv` will error.
For example, to use semicolon-delimited files:
Expand All @@ -699,6 +699,13 @@ export GOCSV_DELIMITER="\t"
gocsv select -c 1 tab-delimited.tsv
```
Or, for more exotic delimiters you can use hexadecimal or unicode (e.g. `\x01` or `\u0001` for the SOH delimiter):
```shell
export GOCSV_DELIMITER="\x01"
gocsv select -c 1 soh-delimited.tsv
```
## Examples
##### Copy Values
Expand Down
4 changes: 2 additions & 2 deletions cmd/delimiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func (sub *DelimiterSubcommand) Run(args []string) {
}

func ChangeDelimiter(inputCsv *InputCsv, inputDelimiter, outputDelimiter string) {
inputDelimiterRune := GetDelimiterFromString(inputDelimiter)
inputDelimiterRune := GetDelimiterFromStringOrPanic(inputDelimiter)
if inputDelimiterRune != rune(0) {
inputCsv.SetDelimiter(inputDelimiterRune)
}
Expand All @@ -41,7 +41,7 @@ func ChangeDelimiter(inputCsv *InputCsv, inputDelimiter, outputDelimiter string)
inputCsv.SetLazyQuotes(true)

outputCsv := NewOutputCsvFromInputCsv(inputCsv)
outputDelimiterRune := GetDelimiterFromString(outputDelimiter)
outputDelimiterRune := GetDelimiterFromStringOrPanic(outputDelimiter)
if outputDelimiterRune != rune(0) {
outputCsv.SetDelimiter(outputDelimiterRune)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/input_csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func NewInputCsv(filename string) (ic *InputCsv, err error) {
ic.reader = csv.NewReader(ic.bufReader)
delimiter := os.Getenv("GOCSV_DELIMITER")
if delimiter != "" {
ic.reader.Comma = GetDelimiterFromString(delimiter)
ic.reader.Comma = GetDelimiterFromStringOrPanic(delimiter)
}
err = ic.handleBom()
return
Expand Down
2 changes: 1 addition & 1 deletion cmd/output_csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func NewOutputCsvFromFile(file *os.File) (oc *OutputCsv) {
oc.csvWriter = csv.NewWriter(file)
delimiter := os.Getenv("GOCSV_DELIMITER")
if delimiter != "" {
oc.csvWriter.Comma = GetDelimiterFromString(delimiter)
oc.csvWriter.Comma = GetDelimiterFromStringOrPanic(delimiter)
}
return
}
Expand Down
30 changes: 22 additions & 8 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,28 @@ const (
NUM_BOM_BYTES = 3
)

func GetDelimiterFromString(delimiter string) rune {
if delimiter == "\\t" {
return '\t'
} else if len(delimiter) > 0 {
delimiterRune, _ := utf8.DecodeRuneInString(delimiter)
return delimiterRune
}
return rune(0)
func GetDelimiterFromString(delimiter string) (rune, error) {
unquoted, err := strconv.Unquote(`"` + delimiter + `"`)
if err != nil {
return utf8.RuneError, err
}
runeCount := utf8.RuneCountInString(unquoted)
if runeCount != 1 {
return utf8.RuneError, fmt.Errorf("delimiter \"%s\" must contain exactly 1 rune, but contains %d", delimiter, runeCount)
}
r, _ := utf8.DecodeRuneInString(unquoted)
if r == utf8.RuneError {
return utf8.RuneError, fmt.Errorf("invalid delimiter \"%s\"", delimiter)
}
return r, nil
}

func GetDelimiterFromStringOrPanic(delimiter string) rune {
r, err := GetDelimiterFromString(delimiter)
if err != nil {
ExitWithError(err)
}
return r
}

// GetIndicesForColumnsOrPanic is a simple wrapper around GetIndicesForColumns
Expand Down
28 changes: 24 additions & 4 deletions cmd/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,44 @@ func TestGetBaseFilenameWithoutExtension(t *testing.T) {
}
}

func TestGetDelimiterFromString(t *testing.T) {
func TestValidGetDelimiterFromString(t *testing.T) {
testCases := []struct {
delimiter string
comma rune
}{
{",", ','},
{";", ';'},
{"\\t", '\t'},
{"", rune(0)},
{"|", '|'},
{"lolcats", 'l'},
{"\\x01", '\x01'},
{"\\u0001", '\x01'},
}
for _, tt := range testCases {
t.Run(tt.delimiter, func(t *testing.T) {
delimiterRune := GetDelimiterFromString(tt.delimiter)
delimiterRune, err := GetDelimiterFromString(tt.delimiter)
if err != nil {
t.Errorf("Expected \"%#U\" but instead got an error: %v", tt.comma, err)
}
if delimiterRune != tt.comma {
t.Errorf("Expected \"%#U\" but got \"%#U\"", tt.comma, delimiterRune)
}
})
}
}

func TestInvalidGetDelimiterFromString(t *testing.T) {
testCases := []struct {
delimiter string
}{
{""},
{"lolcats"},
}
for _, tt := range testCases {
t.Run(tt.delimiter, func(t *testing.T) {
delimiterRune, err := GetDelimiterFromString(tt.delimiter)
if err == nil {
t.Errorf("Expected an error for delimiter \"%s\" but instead got rune \"%#U\"", tt.delimiter, delimiterRune)
}
})
}
}

0 comments on commit 52ab92e

Please sign in to comment.