Skip to content

Commit

Permalink
Add Type Parsing For Most Formats (#17)
Browse files Browse the repository at this point in the history
* add type parsing for csv, gron, xml, ini, line
* type parsing method most cases including float, int, datetime, string
* multi delimiter support for csv
  • Loading branch information
JFryy authored Jul 30, 2024
1 parent 5d29278 commit 737afdc
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 54 deletions.
21 changes: 10 additions & 11 deletions codec/codec_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package codec

import (
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"reflect"
"testing"
)
Expand All @@ -22,7 +22,7 @@ func TestGetEncodingType(t *testing.T) {
{"xml", XML},
{"ini", INI},
{"gron", GRON},
// {"html", HTML},
// {"html", HTML},
}

for _, tt := range tests {
Expand Down Expand Up @@ -63,9 +63,9 @@ func TestUnmarshal(t *testing.T) {
tomlData := "key = \"value\""
gronData := `key = "value";`
tfData := `key = "value"`
// note: html and csv tests are not yet functional
// htmlData := `<html><body><key>value</key></body></html>`
// csvData := "key1,key2\nvalue1,value2\nvalue3,value4"
// note: html and csv tests are not yet functional
// htmlData := `<html><body><key>value</key></body></html>`
// csvData := "key1,key2\nvalue1,value2\nvalue3,value4"

tests := []struct {
input []byte
Expand All @@ -78,11 +78,11 @@ func TestUnmarshal(t *testing.T) {
{[]byte(tomlData), TOML, map[string]interface{}{"key": "value"}},
{[]byte(gronData), GRON, map[string]interface{}{"key": "value"}},
{[]byte(tfData), TF, map[string]interface{}{"key": "value"}},
// {[]byte(htmlData), HTML, map[string]interface{}{"html": map[string]interface{}{"body": map[string]interface{}{"key": "value"}}}},
// {[]byte(csvData), CSV, []map[string]interface{}{
// {"key1": "value1", "key2": "value2"},
// {"key1": "value3", "key2": "value4"},
// }},
// {[]byte(htmlData), HTML, map[string]interface{}{"html": map[string]interface{}{"body": map[string]interface{}{"key": "value"}}}},
// {[]byte(csvData), CSV, []map[string]interface{}{
// {"key1": "value1", "key2": "value2"},
// {"key1": "value3", "key2": "value4"},
// }},
}

for _, tt := range tests {
Expand All @@ -102,4 +102,3 @@ func TestUnmarshal(t *testing.T) {
}
}
}

44 changes: 38 additions & 6 deletions codec/csv_codec.go
Original file line number Diff line number Diff line change
@@ -1,34 +1,66 @@
package codec

import (
"bytes"
"encoding/csv"
"fmt"
"github.com/goccy/go-json"
"io"
"strings"
)

func detectDelimiter(input []byte) rune {
lines := bytes.Split(input, []byte("\n"))
if len(lines) < 2 {
return ','
}

delimiters := []rune{',', ';', '\t', '|', ' '}
var maxDelimiter rune
maxCount := 0

for _, delimiter := range delimiters {
count := strings.Count(string(lines[0]), string(delimiter))
if count > maxCount {
maxCount = count
maxDelimiter = delimiter
}
}

if maxCount == 0 {
return ','
}

return maxDelimiter
}

func csvUnmarshal(input []byte, v interface{}) error {
delimiter := detectDelimiter(input)
r := csv.NewReader(strings.NewReader(string(input)))

// Read the first row for headers
r.Comma = delimiter
r.TrimLeadingSpace = true
headers, err := r.Read()
if err != nil {
return fmt.Errorf("error reading CSV headers: %v", err)
}

var records []map[string]string
var records []map[string]interface{}
for {
record, err := r.Read()
if err != nil {
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("error reading CSV record: %v", err)
}

rowMap := make(map[string]string)
rowMap := make(map[string]interface{})
for i, header := range headers {
rowMap[header] = record[i]
rowMap[header] = parseValue(record[i])
}
records = append(records, rowMap)
}

jsonData, err := json.Marshal(records)
if err != nil {
return fmt.Errorf("error marshaling to JSON: %v", err)
Expand Down
59 changes: 48 additions & 11 deletions codec/gron.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,51 @@ import (
)

func gronUnmarshal(data []byte, v interface{}) error {
d := make(map[string]interface{})

lines := strings.Split(string(data), "\n")
var isArray bool
dataMap := make(map[string]interface{})

for _, line := range lines {
if len(line) == 0 {
continue
}
parts := strings.SplitN(line, " = ", 2)
if len(parts) != 2 {
return nil
return fmt.Errorf("invalid line format: %s", line)
}

key := strings.TrimSpace(parts[0])
value := strings.Trim(parts[1], `";`)
parsedValue := parseValue(value)

if strings.HasPrefix(key, "[") && strings.Contains(key, "]") {
isArray = true
}

setValueJSON(d, key, value)
setValueJSON(dataMap, key, parsedValue)
}

*v.(*interface{}) = d
return nil
if isArray {
var arrayData []interface{}
for i := 0; i < len(dataMap); i++ {
if val, ok := dataMap[fmt.Sprintf("[%d]", i)]; ok {
arrayData = append(arrayData, val)
}
}
vv := reflect.ValueOf(v)
if vv.Kind() != reflect.Ptr || vv.IsNil() {
return fmt.Errorf("provided value must be a non-nil pointer")
}
vv.Elem().Set(reflect.ValueOf(arrayData))
} else {
vv := reflect.ValueOf(v)
if vv.Kind() != reflect.Ptr || vv.IsNil() {
return fmt.Errorf("provided value must be a non-nil pointer")
}
vv.Elem().Set(reflect.ValueOf(dataMap))
}

return nil
}

func gronMarshal(v interface{}) ([]byte, error) {
Expand All @@ -49,7 +73,7 @@ func traverseJSON(prefix string, v interface{}, buf *bytes.Buffer) {
}
case reflect.Slice:
for i := 0; i < rv.Len(); i++ {
traverseJSON(fmt.Sprintf("%s[%d]", prefix, i), rv.Index(i).Interface(), buf)
traverseJSON(fmt.Sprintf("[%d]", i), rv.Index(i).Interface(), buf)
}
default:
buf.WriteString(fmt.Sprintf("%s = %s;\n", prefix, formatJSONValue(v)))
Expand Down Expand Up @@ -80,18 +104,25 @@ func formatJSONValue(v interface{}) string {
}
}

func setValueJSON(data map[string]interface{}, key, value string) {
func setValueJSON(data map[string]interface{}, key string, value interface{}) {
parts := strings.Split(key, ".")
var m = data
for i, part := range parts {
if i == len(parts)-1 {
if strings.Contains(part, "[") && strings.Contains(part, "]") {
k := strings.Split(part, "[")[0]
index := parseArrayIndex(part)
if _, ok := m[k]; !ok {
m[k] = make([]interface{}, 0)
m[k] = make([]interface{}, index+1)
}

m[k] = append(m[k].([]interface{}), value)
arr := m[k].([]interface{})
if len(arr) <= index {
for len(arr) <= index {
arr = append(arr, nil)
}
m[k] = arr
}
arr[index] = value
} else {
m[part] = value
}
Expand All @@ -103,3 +134,9 @@ func setValueJSON(data map[string]interface{}, key, value string) {
}
}
}

func parseArrayIndex(part string) int {
indexStr := strings.Trim(part[strings.Index(part, "[")+1:strings.Index(part, "]")], " ")
index, _ := strconv.Atoi(indexStr)
return index
}
29 changes: 14 additions & 15 deletions codec/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import (
"bytes"
"github.com/goccy/go-json"
"golang.org/x/net/html"
"regexp"
"strconv"
"strings"
"regexp"
"strconv"
)

/*
Expand All @@ -15,15 +15,15 @@ This implementation may have some limitations and may not cover all edge cases.
*/

func decodeUnicodeEscapes(s string) (string, error) {
re := regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
return re.ReplaceAllStringFunc(s, func(match string) string {
hex := match[2:]
codePoint, err := strconv.ParseInt(hex, 16, 32)
if err != nil {
return match
}
return string(rune(codePoint))
}), nil
re := regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
return re.ReplaceAllStringFunc(s, func(match string) string {
hex := match[2:]
codePoint, err := strconv.ParseInt(hex, 16, 32)
if err != nil {
return match
}
return string(rune(codePoint))
}), nil
}

func htmlUnmarshal(data []byte, v interface{}) error {
Expand Down Expand Up @@ -64,15 +64,14 @@ func HTMLToMap(htmlBytes []byte) (map[string]interface{}, error) {
return nil, nil
}


func nodeToMap(node *html.Node) interface{} {
m := make(map[string]interface{})

// Process attributes if present for node
if node.Attr != nil {
for _, attr := range node.Attr {
// Decode Unicode escape sequences and HTML entities
v, _ := decodeUnicodeEscapes(attr.Val)
v, _ := decodeUnicodeEscapes(attr.Val)
m["@"+attr.Key] = v
}
}
Expand All @@ -88,15 +87,15 @@ func nodeToMap(node *html.Node) interface{} {
if text != "" && !(strings.TrimSpace(text) == "" && strings.ContainsAny(text, "\n\r")) {
text, _ = strings.CutSuffix(text, "\n\r")
text, _ = strings.CutPrefix(text, "\n")
text, _ = decodeUnicodeEscapes(text)
text, _ = decodeUnicodeEscapes(text)
childTexts = append(childTexts, text)
}
case html.CommentNode:
text := strings.TrimSpace(child.Data)
if text != "" && !(strings.TrimSpace(text) == "" && strings.ContainsAny(text, "\n\r")) {
text, _ = strings.CutSuffix(text, "\n\r")
text, _ = strings.CutPrefix(text, "\n")
text = html.UnescapeString(text)
text = html.UnescapeString(text)
comments = append(comments, text)
}
case html.ElementNode:
Expand Down
2 changes: 1 addition & 1 deletion codec/ini_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func iniUnmarshal(input []byte, v interface{}) error {
}
sectionMap := make(map[string]interface{})
for _, key := range section.Keys() {
sectionMap[key.Name()] = key.Value()
sectionMap[key.Name()] = parseValue(key.Value())
}
data[section.Name()] = sectionMap
}
Expand Down
18 changes: 15 additions & 3 deletions codec/line_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,31 @@ package codec
import (
"fmt"
"github.com/goccy/go-json"
"reflect"
"strings"
)

func lineUnmarshal(input []byte, v interface{}) error {
lines := strings.Split(strings.TrimSpace(string(input)), "\n")
var parsedLines []interface{}

// Marshal the lines to JSON and then unmarshal into the provided interface
jsonData, err := json.Marshal(lines)
for _, line := range lines {
trimmedLine := strings.TrimSpace(line)
parsedValue := parseValue(trimmedLine)
parsedLines = append(parsedLines, parsedValue)
}

jsonData, err := json.Marshal(parsedLines)
if err != nil {
return fmt.Errorf("error marshaling to JSON: %v", err)
}

if err := json.Unmarshal(jsonData, v); err != nil {
rv := reflect.ValueOf(v)
if rv.Kind() != reflect.Ptr || rv.IsNil() {
return fmt.Errorf("provided value must be a non-nil pointer")
}

if err := json.Unmarshal(jsonData, rv.Interface()); err != nil {
return fmt.Errorf("error unmarshaling JSON: %v", err)
}

Expand Down
23 changes: 23 additions & 0 deletions codec/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import (
"github.com/alecthomas/chroma/styles"
"github.com/mattn/go-isatty"
"os"
"strconv"
"strings"
"time"
)

func PrettyFormat(s string, fileType EncodingType, raw bool) (string, error) {
Expand Down Expand Up @@ -63,3 +65,24 @@ func PrettyFormat(s string, fileType EncodingType, raw bool) (string, error) {

return buffer.String(), nil
}

func parseValue(value string) interface{} {
value = strings.TrimSpace(value)

if intValue, err := strconv.Atoi(value); err == nil {
return intValue
}
if floatValue, err := strconv.ParseFloat(value, 64); err == nil {
return floatValue
}
if boolValue, err := strconv.ParseBool(value); err == nil {
return boolValue
}
if dateValue, err := time.Parse(time.RFC3339, value); err == nil {
return dateValue
}
if dateValue, err := time.Parse("2006-01-02", value); err == nil {
return dateValue
}
return value
}
Loading

0 comments on commit 737afdc

Please sign in to comment.