Skip to content

Commit

Permalink
Merge pull request #14 from wanglei-ok/skip_utf8bom
Browse files Browse the repository at this point in the history
ReadFileInto to skip a single leading UTF8 BOM sequence if it exists.
  • Loading branch information
speter authored May 17, 2018
2 parents f21d161 + 99a6611 commit 61b2c08
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 0 deletions.
20 changes: 20 additions & 0 deletions read.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package gcfg

import (
"bytes"
"fmt"
"io"
"io/ioutil"
Expand All @@ -13,6 +14,7 @@ import (
)

var unescape = map[rune]rune{'\\': '\\', '"': '"', 'n': '\n', 't': '\t'}
var utf8Bom = []byte("\ufeff")

// no error: invalid literals should be caught by scanner
func unquote(s string) string {
Expand Down Expand Up @@ -221,6 +223,9 @@ func ReadStringInto(config interface{}, str string) error {

// ReadFileInto reads gcfg formatted data from the file filename and sets the
// values into the corresponding fields in config.
//
// For compatibility with files created on Windows, the ReadFileInto skips a
// single leading UTF8 BOM sequence if it exists.
func ReadFileInto(config interface{}, filename string) error {
f, err := os.Open(filename)
if err != nil {
Expand All @@ -231,7 +236,22 @@ func ReadFileInto(config interface{}, filename string) error {
if err != nil {
return err
}

// Skips a single leading UTF8 BOM sequence if it exists.
src = skipLeadingUtf8Bom(src)

fset := token.NewFileSet()
file := fset.AddFile(filename, fset.Base(), len(src))
return readInto(config, fset, file, src)
}

func skipLeadingUtf8Bom(src []byte) []byte {
lengthUtf8Bom := len(utf8Bom)

if len(src) >= lengthUtf8Bom {
if bytes.Equal(src[:lengthUtf8Bom], utf8Bom) {
return src[lengthUtf8Bom:]
}
}
return src
}
37 changes: 37 additions & 0 deletions read_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package gcfg

import (
"bytes"
"encoding"
"fmt"
"math/big"
Expand Down Expand Up @@ -339,6 +340,17 @@ func TestReadFileIntoUnicode(t *testing.T) {
}
}

func TestReadFileIntoNotepad(t *testing.T) {
res := &struct{ X甲 struct{ X乙 string } }{}
err := ReadFileInto(res, "testdata/notepad.ini")
if err != nil {
t.Error(err)
}
if "丁" != res.X甲.X乙 {
t.Errorf("got %q, wanted %q", res.X甲.X乙, "丁")
}
}

func TestReadStringIntoSubsectDefaults(t *testing.T) {
type subsect struct {
Color string
Expand Down Expand Up @@ -404,3 +416,28 @@ func TestPanics(t *testing.T) {
testPanic(t, tt.id, tt.config, tt.gcfg)
}
}

var utf8bomtests = []struct {
id string
in []byte
out []byte
}{
{"0 bytes input", []byte{}, []byte{}},
{"3 bytes input (BOM only)", []byte("\ufeff"), []byte{}},
{"3 bytes input (comment only, without BOM)", []byte(";c\n"), []byte(";c\n")},
{"normal input with BOM", []byte("\ufeff[section]\nname=value"), []byte("[section]\nname=value")},
{"normal input without BOM", []byte("[section]\nname=value"), []byte("[section]\nname=value")},
}

func testUtf8Bom(t *testing.T, id string, in, out []byte) {
got := skipLeadingUtf8Bom([]byte(in))
if !bytes.Equal(got, out) {
t.Errorf("%s.", id)
}
}

func TestUtf8Boms(t *testing.T) {
for _, tt := range utf8bomtests {
testUtf8Bom(t, tt.id, tt.in, tt.out)
}
}
3 changes: 3 additions & 0 deletions testdata/notepad.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
; Comment line
[甲]
乙=丁 # Update 乙 to 丁 by notepad on windows

0 comments on commit 61b2c08

Please sign in to comment.