-
Notifications
You must be signed in to change notification settings - Fork 103
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6d40d54
commit 9b277e8
Showing
7 changed files
with
355 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
package data_table | ||
|
||
import ( | ||
"bufio" | ||
"fmt" | ||
"io" | ||
"strings" | ||
) | ||
|
||
// data_table is a general parser for an input of data which conforms to columns and rows styled output. | ||
// Parser options | ||
// skipLines - The number of initial lines of data to skip. By default no lines are skipped. This can be useful if consistent undesired output/garbage is printed before the data to parse. | ||
// headers - The set of headers. If left blank, the parser assumes the headers are in the first line of data and splits that line to set them. | ||
// delimiter - The splitting string. If left blank, the parser assumes the delimiter is whitespace and uses `strings.Fields()` split method. | ||
type parser struct { | ||
skipLines uint | ||
headers []string | ||
delimiter string | ||
} | ||
|
||
type parserOpt func(*parser) | ||
|
||
func WithSkipLines(skipLines uint) parserOpt { | ||
return func(p *parser) { | ||
p.skipLines = skipLines | ||
} | ||
} | ||
|
||
func WithHeaders(headers []string) parserOpt { | ||
return func(p *parser) { | ||
p.headers = headers | ||
} | ||
} | ||
|
||
func WithDelimiter(delimiter string) parserOpt { | ||
return func(p *parser) { | ||
p.delimiter = delimiter | ||
} | ||
} | ||
|
||
func NewParser(opts ...parserOpt) *parser { | ||
p := &parser{} | ||
|
||
for _, opt := range opts { | ||
opt(p) | ||
} | ||
|
||
return p | ||
} | ||
|
||
func (p parser) Parse(reader io.Reader) (any, error) { | ||
return p.parseLines(reader) | ||
} | ||
|
||
// parseLines scans a reader line by line and splits it into fields based on a delimiter. | ||
// The line fields are paired with a header, which is defined by an input array, or the first line of data. | ||
func (p parser) parseLines(reader io.Reader) ([]map[string]string, error) { | ||
results := make([]map[string]string, 0) | ||
scanner := bufio.NewScanner(reader) | ||
|
||
// Skip first N lines due to provided headers or otherwise. | ||
// This would likely only ever be 1 or 0, but we may want more. | ||
for p.skipLines > 0 { | ||
p.skipLines-- | ||
|
||
if !scanner.Scan() { | ||
return results, fmt.Errorf("skipped past all lines of data") | ||
} | ||
} | ||
|
||
headerCount := len(p.headers) | ||
|
||
for scanner.Scan() { | ||
line := scanner.Text() | ||
|
||
// headers weren't provided, so retrieve them from the first available line. | ||
if headerCount == 0 { | ||
p.headers = p.lineSplit(line, headerCount) | ||
headerCount = len(p.headers) | ||
continue | ||
} | ||
|
||
row := make(map[string]string) | ||
fields := p.lineSplit(line, headerCount) | ||
// It's possible we don't have the same number of fields to headers, so use | ||
// min here to avoid a possible array out-of-bounds exception. | ||
min := min(headerCount, len(fields)) | ||
|
||
// For each header, add the corresponding line field to the result row. | ||
// Duplicate headers overwrite the set value. | ||
for i := 0; i < min; i++ { | ||
row[strings.TrimSpace(p.headers[i])] = strings.TrimSpace(fields[i]) | ||
} | ||
|
||
results = append(results, row) | ||
} | ||
|
||
return results, nil | ||
} | ||
|
||
// lineSplit switches to the appropriate splitting method to return the current line's fields. | ||
// Delimiter often might be a comma or similar single character. | ||
func (p parser) lineSplit(line string, headerCount int) []string { | ||
switch p.delimiter { | ||
case "": | ||
// Delimiter wasn't provided, assume whitespace separated fields. | ||
return strings.Fields(line) | ||
default: | ||
// If we have a count of the headers, split the current line to N fields. | ||
// Otherwise assume headers weren't provided and split the initial line to set them. | ||
if headerCount > 0 { | ||
return strings.SplitN(line, p.delimiter, headerCount) | ||
} else { | ||
return strings.Split(line, p.delimiter) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
package data_table | ||
|
||
import ( | ||
"bytes" | ||
_ "embed" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
//go:embed test-data/test.csv | ||
var csv []byte | ||
|
||
//go:embed test-data/top.txt | ||
var top []byte | ||
|
||
//go:embed test-data/snap.txt | ||
var snap []byte | ||
|
||
func TestParse(t *testing.T) { | ||
t.Parallel() | ||
|
||
var tests = []struct { | ||
name string | ||
input []byte | ||
skipLines uint | ||
headers []string | ||
delimiter string | ||
expected []map[string]string | ||
}{ | ||
{ | ||
name: "empty input", | ||
expected: make([]map[string]string, 0), | ||
}, | ||
{ | ||
name: "snap", | ||
input: snap, | ||
expected: []map[string]string{ | ||
{ | ||
"Name": "core22", | ||
"Version": "20240111", | ||
"Rev": "1122", | ||
"Size": "77MB", | ||
"Publisher": "canonical**", | ||
"Notes": "base", | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "csv", | ||
input: csv, | ||
headers: []string{"name", "age", "date", "street", "city", "state", "zip"}, | ||
delimiter: ",", | ||
expected: []map[string]string{ | ||
{ | ||
"name": "Sara Walton", | ||
"age": "19", | ||
"date": "07/10/2010", | ||
"street": "Tagka Manor", | ||
"city": "Kedevwir", | ||
"state": "WV", | ||
"zip": "40036", | ||
}, | ||
{ | ||
"name": "Martin Powers", | ||
"age": "23", | ||
"date": "06/23/1942", | ||
"street": "Eror Parkway", | ||
"city": "Masuzose", | ||
"state": "ID", | ||
"zip": "92375", | ||
}, | ||
{ | ||
"name": "Sara Porter", | ||
"age": "53", | ||
"date": "01/12/1942", | ||
"street": "Ipsuj Path", | ||
"city": "Kikvitud", | ||
"state": "GA", | ||
"zip": "26070", | ||
}, | ||
{ | ||
"name": "Jayden Riley", | ||
"age": "41", | ||
"date": "11/30/2008", | ||
"street": "Rahef Point", | ||
"city": "Sirunu", | ||
"state": "UT", | ||
"zip": "21076", | ||
}, | ||
{ | ||
"name": "Genevieve Greene", | ||
"age": "58", | ||
"date": "04/07/1976", | ||
"street": "Camguf Terrace", | ||
"city": "Cunule", | ||
"state": "KS", | ||
"zip": "40733", | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "top", | ||
input: top, | ||
skipLines: 11, | ||
expected: []map[string]string{ | ||
{ | ||
"PID": "3210", | ||
"#TH": "29", | ||
"MEM": "2552M", | ||
"PGRP": "3210", | ||
"PPID": "1", | ||
"UID": "501", | ||
}, | ||
{ | ||
"PID": "4933", | ||
"#TH": "19/1", | ||
"MEM": "1266M", | ||
"PGRP": "4930", | ||
"PPID": "4930", | ||
"UID": "501", | ||
}, | ||
{ | ||
"PID": "400", | ||
"#TH": "20", | ||
"MEM": "1021M", | ||
"PGRP": "400", | ||
"PPID": "1", | ||
"UID": "88", | ||
}, | ||
{ | ||
"PID": "67777", | ||
"#TH": "5", | ||
"MEM": "824M", | ||
"PGRP": "4930", | ||
"PPID": "67536", | ||
"UID": "501", | ||
}, | ||
{ | ||
"PID": "1265", | ||
"#TH": "26", | ||
"MEM": "631M", | ||
"PGRP": "1258", | ||
"PPID": "1258", | ||
"UID": "501", | ||
}, | ||
{ | ||
"PID": "87436", | ||
"#TH": "25", | ||
"MEM": "511M", | ||
"PGRP": "84083", | ||
"PPID": "84083", | ||
"UID": "501", | ||
}, | ||
{ | ||
"PID": "67534", | ||
"#TH": "21", | ||
"MEM": "420M", | ||
"PGRP": "4930", | ||
"PPID": "4930", | ||
"UID": "501", | ||
}, | ||
{ | ||
"PID": "3189", | ||
"#TH": "37", | ||
"MEM": "403M", | ||
"PGRP": "3189", | ||
"PPID": "1", | ||
"UID": "501", | ||
}, | ||
{ | ||
"PID": "579", | ||
"#TH": "23", | ||
"MEM": "352M", | ||
"PGRP": "579", | ||
"PPID": "1", | ||
"UID": "0", | ||
}, | ||
{ | ||
"PID": "4936", | ||
"#TH": "22", | ||
"MEM": "312M", | ||
"PGRP": "4930", | ||
"PPID": "4930", | ||
"UID": "501", | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
tt := tt | ||
t.Run(tt.name, func(t *testing.T) { | ||
t.Parallel() | ||
|
||
p := NewParser(WithSkipLines(tt.skipLines), WithHeaders(tt.headers), WithDelimiter(tt.delimiter)) | ||
result, err := p.Parse(bytes.NewReader(tt.input)) | ||
|
||
require.NoError(t, err, "unexpected error parsing input") | ||
require.ElementsMatch(t, tt.expected, result) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Name Version Rev Size Publisher Notes | ||
core22 20240111 1122 77MB canonical** base |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
Sara Walton,19,07/10/2010,Tagka Manor,Kedevwir,WV,40036 | ||
Martin Powers,23,06/23/1942,Eror Parkway,Masuzose,ID,92375 | ||
Sara Porter,53,01/12/1942,Ipsuj Path,Kikvitud,GA,26070 | ||
Jayden Riley,41,11/30/2008,Rahef Point,Sirunu,UT,21076 | ||
Genevieve Greene,58,04/07/1976,Camguf Terrace,Cunule,KS,40733 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
Processes: 720 total, 3 running, 717 sleeping, 3841 threads | ||
2024/03/11 12:18:30 | ||
Load Avg: 1.79, 1.52, 1.67 | ||
CPU usage: 13.43% user, 10.55% sys, 76.0% idle | ||
SharedLibs: 810M resident, 151M data, 106M linkedit. | ||
MemRegions: 586100 total, 10G resident, 533M private, 5265M shared. | ||
PhysMem: 30G used (2057M wired, 2316M compressor), 1124M unused. | ||
VM: 321T vsize, 4773M framework vsize, 0(0) swapins, 0(0) swapouts. | ||
Networks: packets: 27837831/21G in, 15638815/5658M out. | ||
Disks: 30011035/222G read, 15760164/231G written. | ||
|
||
PID #TH MEM PGRP PPID UID | ||
3210 29 2552M 3210 1 501 | ||
4933 19/1 1266M 4930 4930 501 | ||
400 20 1021M 400 1 88 | ||
67777 5 824M 4930 67536 501 | ||
1265 26 631M 1258 1258 501 | ||
87436 25 511M 84083 84083 501 | ||
67534 21 420M 4930 4930 501 | ||
3189 37 403M 3189 1 501 | ||
579 23 352M 579 1 0 | ||
4936 22 312M 4930 4930 501 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters