-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
make quote escape character configurable
- Loading branch information
Stefan Tudose
committed
Nov 15, 2020
1 parent
25dc33e
commit 0fba587
Showing
8 changed files
with
149 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package csvdecoder | ||
|
||
import ( | ||
"io" | ||
"io/ioutil" | ||
"strings" | ||
"unicode" | ||
) | ||
|
||
type readerCustomEscape struct { | ||
reader io.Reader | ||
} | ||
|
||
const ( | ||
// defaultEscapeChar is the character used by the encoding/csv package to escape a quote | ||
defaultEscapeChar = '"' | ||
quote = '"' | ||
) | ||
|
||
// NewReaderWithCustomEscape creates a reader that uses a custom character as escape character | ||
// instead of the quote used by the encoding/csv Reader. | ||
func NewReaderWithCustomEscape(r io.Reader, escapeChar rune) (*readerCustomEscape, error) { | ||
b, err := ioutil.ReadAll(r) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
tmpEscape := unicode.ReplacementChar // assuming this character doesn't appear in the string | ||
|
||
// replace the escaped escape character as it should not influence any quote | ||
// for simplicity we temporarily replace the escaped escape chars with a special character | ||
s := strings.ReplaceAll( | ||
string(b), | ||
string([]rune{escapeChar, escapeChar}), | ||
string(tmpEscape), | ||
) | ||
|
||
// replace the escaped quotes with the standard encoding/csv escape sequence | ||
s = strings.ReplaceAll( | ||
s, | ||
string([]rune{escapeChar, quote}), | ||
string([]rune{defaultEscapeChar, quote}), | ||
) | ||
|
||
// replace the back the escaped escape character | ||
s = strings.ReplaceAll( | ||
s, | ||
string(tmpEscape), | ||
string([]rune{escapeChar, escapeChar}), | ||
) | ||
|
||
return &readerCustomEscape{ | ||
reader: strings.NewReader(s), | ||
}, nil | ||
} | ||
|
||
func (r readerCustomEscape) Read(p []byte) (n int, err error) { | ||
return r.reader.Read(p) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package csvdecoder | ||
|
||
import ( | ||
"io/ioutil" | ||
"strings" | ||
"testing" | ||
) | ||
|
||
func TestEscapeReader(t *testing.T) { | ||
for _, tc := range []struct { | ||
name string | ||
input string | ||
escapeChar rune | ||
expectedResult string | ||
}{ | ||
{ | ||
name: "should work without anything to escape", | ||
input: "my example string", | ||
escapeChar: '_', | ||
expectedResult: "my example string", | ||
}, | ||
{ | ||
name: "should replace escaping quotes", | ||
input: `my _"example_" string`, | ||
escapeChar: '_', | ||
expectedResult: `my ""example"" string`, | ||
}, | ||
{ | ||
name: "should not replace escaping chars without quotes", | ||
input: "my _example_ string", | ||
escapeChar: '_', | ||
expectedResult: "my _example_ string", | ||
}, | ||
{ | ||
name: "should ignore escaped escaped chars", | ||
input: `my example string__"`, | ||
escapeChar: '_', | ||
expectedResult: `my example string__"`, | ||
}, | ||
} { | ||
tc := tc | ||
t.Run(tc.name, func(t *testing.T) { | ||
r, err := NewReaderWithCustomEscape(strings.NewReader(tc.input), tc.escapeChar) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
result, err := ioutil.ReadAll(r) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
if string(result) != tc.expectedResult { | ||
t.Errorf("expected value '%s' got '%s'", tc.expectedResult, result) | ||
} | ||
}) | ||
} | ||
} |