Skip to content

Commit

Permalink
make quote escape character configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
Stefan Tudose committed Nov 15, 2020
1 parent 25dc33e commit 0fba587
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- allow client to configure the quote escape character

### Changed

### Deprecated
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ The behavior of the decoder can be configured by passing one of following option
- Comma: the character that separates values. Default value is comma.
- IgnoreHeaders: if set to true, the first line will be ignored. This is useful when the CSV file contains a header line.
- IgnoreUnmatchingFields: if set to true, the number of fields and scan targets are allowed to be different. By default, if they don't match exactly it will cause an error.
- EscapeChar: the character used to escape the quote character in quoted fields. The default is the quote itself as used by the `encoding/csv` reader.

```golang
decoder, err := csvdecoder.NewWithConfig(file, csvdecoder.Config{Comma: ';', IgnoreHeaders: true})
Expand Down
13 changes: 12 additions & 1 deletion decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ type Config struct {
Comma rune // the character that separates values. Default value is comma.
IgnoreHeaders bool // if set to true, the first line will be ignored
IgnoreUnmatchingFields bool // if set to true, the number of fields and scan targets are allowed to be different
EscapeChar rune // the character used to escape the quote character in quoted fields. The default is the quote itself.
}

// New returns a new CSV decoder that reads from r.
Expand All @@ -29,10 +30,20 @@ func NewWithConfig(r io.Reader, config Config) (*Decoder, error) {

// New returns a new CSV decoder that reads from r
func New(r io.Reader) (*Decoder, error) {
return newDecoder(r, Config{})
return newDecoder(r, Config{
EscapeChar: defaultEscapeChar,
})
}

func newDecoder(reader io.Reader, config Config) (*Decoder, error) {
if config.EscapeChar != defaultEscapeChar {
var err error
reader, err = NewReaderWithCustomEscape(reader, config.EscapeChar)
if err != nil {
return nil, err
}
}

p := &Decoder{
reader: csv.NewReader(reader),
config: config,
Expand Down
8 changes: 4 additions & 4 deletions decoder_interface_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func TestDecoderStruct(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
if err != nil {
t.Fatalf("could not create d: %w", err)
t.Fatalf("could not create d: %s", err)
}

for d.Next() {
Expand Down Expand Up @@ -77,7 +77,7 @@ func TestDecoderPointer(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
if err != nil {
t.Fatalf("could not create d: %w", err)
t.Fatalf("could not create d: %s", err)
}

for d.Next() {
Expand Down Expand Up @@ -114,7 +114,7 @@ func TestDecoderDoublePointer(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
if err != nil {
t.Fatalf("could not create d: %w", err)
t.Fatalf("could not create d: %s", err)
}

for d.Next() {
Expand Down Expand Up @@ -150,7 +150,7 @@ func TestDecoderInterface(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false})
if err != nil {
t.Fatalf("could not create d: %w", err)
t.Fatalf("could not create d: %s", err)
}

for d.Next() {
Expand Down
6 changes: 3 additions & 3 deletions decoder_slice_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func TestIntSlice(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
if err != nil {
t.Fatalf("could not create d: %w", err)
t.Fatalf("could not create d: %s", err)
}

for d.Next() {
Expand Down Expand Up @@ -106,7 +106,7 @@ func TestMultiLevelIntSlice(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
if err != nil {
t.Fatalf("could not create d: %w", err)
t.Fatalf("could not create d: %s", err)
}

for d.Next() {
Expand Down Expand Up @@ -191,7 +191,7 @@ func TestStructSlice(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
d, err := NewWithConfig(strings.NewReader(tc.data), Config{IgnoreHeaders: false, Comma: '\t'})
if err != nil {
t.Fatalf("could not create d: %w", err)
t.Fatalf("could not create d: %s", err)
}

for d.Next() {
Expand Down
11 changes: 10 additions & 1 deletion decoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ func TestIgnoreUnmatchingFields(t *testing.T) {
scanTargets: []interface{}{&strVal, &intVal},
expectedError: nil,
},
{
name: "should work for a string containing a quote",
config: Config{
IgnoreUnmatchingFields: true,
},
data: "rec,2\"\n",
scanTargets: []interface{}{&strVal, &strVal},
expectedError: nil,
},
{
name: "should work when numbers match with default config",
config: Config{},
Expand Down Expand Up @@ -98,7 +107,7 @@ func TestIgnoreUnmatchingFields(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
d, err := NewWithConfig(strings.NewReader(tc.data), tc.config)
if err != nil {
t.Fatalf("could not create d: %w", err)
t.Fatalf("could not create d: %s", err)
}

for d.Next() {
Expand Down
59 changes: 59 additions & 0 deletions escape_reader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package csvdecoder

import (
"io"
"io/ioutil"
"strings"
"unicode"
)

type readerCustomEscape struct {
reader io.Reader
}

const (
// defaultEscapeChar is the character used by the encoding/csv package to escape a quote
defaultEscapeChar = '"'
quote = '"'
)

// NewReaderWithCustomEscape creates a reader that uses a custom character as escape character
// instead of the quote used by the encoding/csv Reader.
func NewReaderWithCustomEscape(r io.Reader, escapeChar rune) (*readerCustomEscape, error) {
b, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}

tmpEscape := unicode.ReplacementChar // assuming this character doesn't appear in the string

// replace the escaped escape character as it should not influence any quote
// for simplicity we temporarily replace the escaped escape chars with a special character
s := strings.ReplaceAll(
string(b),
string([]rune{escapeChar, escapeChar}),
string(tmpEscape),
)

// replace the escaped quotes with the standard encoding/csv escape sequence
s = strings.ReplaceAll(
s,
string([]rune{escapeChar, quote}),
string([]rune{defaultEscapeChar, quote}),
)

// replace the back the escaped escape character
s = strings.ReplaceAll(
s,
string(tmpEscape),
string([]rune{escapeChar, escapeChar}),
)

return &readerCustomEscape{
reader: strings.NewReader(s),
}, nil
}

func (r readerCustomEscape) Read(p []byte) (n int, err error) {
return r.reader.Read(p)
}
58 changes: 58 additions & 0 deletions escape_reader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package csvdecoder

import (
"io/ioutil"
"strings"
"testing"
)

func TestEscapeReader(t *testing.T) {
for _, tc := range []struct {
name string
input string
escapeChar rune
expectedResult string
}{
{
name: "should work without anything to escape",
input: "my example string",
escapeChar: '_',
expectedResult: "my example string",
},
{
name: "should replace escaping quotes",
input: `my _"example_" string`,
escapeChar: '_',
expectedResult: `my ""example"" string`,
},
{
name: "should not replace escaping chars without quotes",
input: "my _example_ string",
escapeChar: '_',
expectedResult: "my _example_ string",
},
{
name: "should ignore escaped escaped chars",
input: `my example string__"`,
escapeChar: '_',
expectedResult: `my example string__"`,
},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
r, err := NewReaderWithCustomEscape(strings.NewReader(tc.input), tc.escapeChar)
if err != nil {
t.Fatal(err)
}

result, err := ioutil.ReadAll(r)
if err != nil {
t.Fatal(err)
}

if string(result) != tc.expectedResult {
t.Errorf("expected value '%s' got '%s'", tc.expectedResult, result)
}
})
}
}

0 comments on commit 0fba587

Please sign in to comment.