diff --git a/read.go b/read.go index 5b840a9..0679665 100644 --- a/read.go +++ b/read.go @@ -1,6 +1,7 @@ package gcfg import ( + "bytes" "fmt" "io" "io/ioutil" @@ -13,6 +14,7 @@ import ( ) var unescape = map[rune]rune{'\\': '\\', '"': '"', 'n': '\n', 't': '\t'} +var utf8Bom = []byte("\ufeff") // no error: invalid literals should be caught by scanner func unquote(s string) string { @@ -221,6 +223,9 @@ func ReadStringInto(config interface{}, str string) error { // ReadFileInto reads gcfg formatted data from the file filename and sets the // values into the corresponding fields in config. +// +// For compatibility with files created on Windows, the ReadFileInto skips a +// single leading UTF8 BOM sequence if it exists. func ReadFileInto(config interface{}, filename string) error { f, err := os.Open(filename) if err != nil { @@ -231,7 +236,22 @@ func ReadFileInto(config interface{}, filename string) error { if err != nil { return err } + + // Skips a single leading UTF8 BOM sequence if it exists. + src = skipLeadingUtf8Bom(src) + fset := token.NewFileSet() file := fset.AddFile(filename, fset.Base(), len(src)) return readInto(config, fset, file, src) } + +func skipLeadingUtf8Bom(src []byte) []byte { + lengthUtf8Bom := len(utf8Bom) + + if len(src) >= lengthUtf8Bom { + if bytes.Equal(src[:lengthUtf8Bom], utf8Bom) { + return src[lengthUtf8Bom:] + } + } + return src +} diff --git a/read_test.go b/read_test.go index 6cfeaa1..6d3eecc 100644 --- a/read_test.go +++ b/read_test.go @@ -1,6 +1,7 @@ package gcfg import ( + "bytes" "encoding" "fmt" "math/big" @@ -339,6 +340,17 @@ func TestReadFileIntoUnicode(t *testing.T) { } } +func TestReadFileIntoNotepad(t *testing.T) { + res := &struct{ X甲 struct{ X乙 string } }{} + err := ReadFileInto(res, "testdata/notepad.ini") + if err != nil { + t.Error(err) + } + if "丁" != res.X甲.X乙 { + t.Errorf("got %q, wanted %q", res.X甲.X乙, "丁") + } +} + func TestReadStringIntoSubsectDefaults(t *testing.T) { type subsect struct { Color string @@ -404,3 +416,28 @@ func TestPanics(t *testing.T) { testPanic(t, tt.id, tt.config, tt.gcfg) } } + +var utf8bomtests = []struct { + id string + in []byte + out []byte +}{ + {"0 bytes input", []byte{}, []byte{}}, + {"3 bytes input (BOM only)", []byte("\ufeff"), []byte{}}, + {"3 bytes input (comment only, without BOM)", []byte(";c\n"), []byte(";c\n")}, + {"normal input with BOM", []byte("\ufeff[section]\nname=value"), []byte("[section]\nname=value")}, + {"normal input without BOM", []byte("[section]\nname=value"), []byte("[section]\nname=value")}, +} + +func testUtf8Bom(t *testing.T, id string, in, out []byte) { + got := skipLeadingUtf8Bom([]byte(in)) + if !bytes.Equal(got, out) { + t.Errorf("%s.", id) + } +} + +func TestUtf8Boms(t *testing.T) { + for _, tt := range utf8bomtests { + testUtf8Bom(t, tt.id, tt.in, tt.out) + } +} diff --git a/testdata/notepad.ini b/testdata/notepad.ini new file mode 100644 index 0000000..08673b8 --- /dev/null +++ b/testdata/notepad.ini @@ -0,0 +1,3 @@ +; Comment line +[甲] +乙=丁 # Update 乙 to 丁 by notepad on windows