diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 803fadafd..7ab1dbd2a 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -29,3 +29,8 @@ jobs: go install honnef.co/go/tools/cmd/staticcheck@latest staticcheck ./... go vet ./... + + - name: revive + run: | + go install github.com/mgechev/revive@latest + revive -config .revive.toml ./... diff --git a/revive.toml b/.revive.toml similarity index 100% rename from revive.toml rename to .revive.toml diff --git a/.testcoverage.yml b/.testcoverage.yml index bdf99451a..be730bb4b 100644 --- a/.testcoverage.yml +++ b/.testcoverage.yml @@ -24,27 +24,3 @@ threshold: # (optional; default 0) # The minimum total coverage project should have total: 0 - -# Holds regexp rules which will override thresholds for matched files or packages -# using their paths. -# -# First rule from this list that matches file or package is going to apply -# new threshold to it. If project has multiple rules that match same path, -# override rules should be listed in order from specific to more general rules. -override: - # Increase coverage threshold to 100% for `foo` package - # (default is 80, as configured above in this example) - - threshold: 100 - path: ^pkg/lib/foo$ - -# Holds regexp rules which will exclude matched files or packages -# from coverage statistics -exclude: - # Exclude files or packages matching their paths - paths: - - \.pb\.go$ # excludes all protobuf generated files - - ^pkg/bar # exclude package `pkg/bar` - -# NOTES: -# - symbol `/` in all path regexps will be replaced by current OS file path separator -# to properly work on Windows diff --git a/cmd/cmds/docs.go b/cmd/cmds/docs.go deleted file mode 100644 index a83b56b07..000000000 --- a/cmd/cmds/docs.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package cmds contains the commands for the application -package cmds diff --git a/cmd/cmds/gen.go b/cmd/cmds/gen.go deleted file mode 100644 index bb768c490..000000000 --- a/cmd/cmds/gen.go +++ /dev/null @@ -1,26 +0,0 @@ -package cmds - -import ( - "fmt" - - "github.com/spf13/cobra" -) - -// NewGenCmd returns a new cobra command for the gen subcommand -func NewGenCmd() *cobra.Command { - return &cobra.Command{ - Use: "gen", - Short: "Generates a scraper utilizing seltabl", - Long: ` -Subcommand to generate a scraper with a given file name. - -Usage: - - $ seltabl gen -`, - RunE: func(cmd *cobra.Command, args []string) error { - fmt.Println("gen called") - return nil - }, - } -} diff --git a/cmd/doc.go b/cmd/doc.go deleted file mode 100644 index f89d95e57..000000000 --- a/cmd/doc.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package cmd is the command package for the application -package cmd diff --git a/cmd/root.go b/cmd/root.go deleted file mode 100644 index c23b5056b..000000000 --- a/cmd/root.go +++ /dev/null @@ -1,33 +0,0 @@ -package cmd - -import ( - "fmt" - - "github.com/spf13/cobra" -) - -// rootCmd represents the base command when called without any subcommands -var rootCmd = &cobra.Command{ - Use: "seltabl", - Short: "AI driven table parsing code generator for Go", - Long: ` -seltabl: A golang library for configurably parsing html sequences into stucts originally built for html tables, but can be used for any html sequence. - -Command allows you to generate a golang struct from a html table given the data selectors for the table and the data selectors for the types of fields in the struct. -`, -} - -// Execute adds all child commands to the root command and sets flags appropriately. -// This is called by main.main(). It only needs to happen once to the rootCmd. -func Execute() error { - err := rootCmd.Execute() - if err != nil { - return fmt.Errorf("failed to execute command: %w", err) - } - return nil -} - -func init() { - // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.seltabl.yaml)") - rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") -} diff --git a/cmd/routes.go b/cmd/routes.go deleted file mode 100644 index 7c6a2b580..000000000 --- a/cmd/routes.go +++ /dev/null @@ -1,10 +0,0 @@ -package cmd - -import ( - "github.com/conneroisu/seltabl/cmd/cmds" - "github.com/spf13/cobra" -) - -func AddRoutes(rootCmd *cobra.Command) { - rootCmd.AddCommand(cmds.NewGenCmd()) -} diff --git a/decoder.go b/decoder.go index 1caa274d0..ab2db9f88 100644 --- a/decoder.go +++ b/decoder.go @@ -1,6 +1,9 @@ package seltabl -import "io" +import ( + "fmt" + "io" +) // Decoder is a struct for decoding a reader into a slice of structs. // @@ -72,7 +75,12 @@ func NewDecoder[T any](r io.ReadCloser) *Decoder[T] { // This allows for decoding a reader into a slice of structs. // // Similar to the json.Decoder for brevity. -func (d *Decoder[T]) Decode(value *T) ([]T, error) { +func (d *Decoder[T]) Decode() ([]T, error) { defer d.reader.Close() - return NewFromReader[T](d.reader) + var result []T + result, err := NewFromReader[T](d.reader) + if err != nil { + return nil, fmt.Errorf("failed to decode: %w", err) + } + return result, nil } diff --git a/decoder_test.go b/decoder_test.go index 6add15c83..4bcb30caf 100644 --- a/decoder_test.go +++ b/decoder_test.go @@ -1 +1,139 @@ package seltabl + +import ( + "io" + "strings" + "testing" +) + +// DecodeExStruct is a test struct +type DecodeExStruct struct { + A string `json:"a" seltabl:"a" hSel:"tr:nth-child(1) td:nth-child(1)" dSel:"tr td:nth-child(1)" cSel:"$text"` + B string `json:"b" seltabl:"b" hSel:"tr:nth-child(1) td:nth-child(2)" dSel:"tr td:nth-child(2)" cSel:"$text"` +} + +// TestDecoder_Decode tests the Decoder.Decode function +func TestDecoder_Decode(t *testing.T) { + testCases := []struct { + name string + input string + expected []DecodeExStruct + hasError bool + }{ + { + name: "Valid input", + input: ` + + + + + + + + + + + + + +
ab
12
34
+ `, + expected: []DecodeExStruct{ + {A: "1", B: "2"}, + {A: "3", B: "4"}, + }, + hasError: false, + }, + { + name: "Invalid input", + input: ` + + + + + + + + +
ab
1
+ `, + expected: nil, + hasError: true, + }, + { + name: "Invalid input with invalid html", + input: ` + + + + + + + + +
ab
1
+ `, + expected: nil, + hasError: true, + }, + { + name: "Invalid input with invalid json", + input: ` + + + + + + + + +
ab
1
+ `, + expected: nil, + hasError: true, + }, + { + name: "Invalid input with invalid json", + input: ` + + + + + + +
ab1
+ `, + expected: nil, + hasError: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + r := io.NopCloser(strings.NewReader(tc.input)) + decoder := NewDecoder[DecodeExStruct](r) + result, err := decoder.Decode() + + if tc.hasError { + if err == nil { + t.Errorf("Expected an error, but got none") + } + return + } + + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if len(result) != len(tc.expected) { + t.Errorf("Expected %d results, but got %d", len(tc.expected), len(result)) + } + + for i, expected := range tc.expected { + if result[i].A != expected.A || result[i].B != expected.B { + t.Errorf("Expected %+v, but got %+v", expected, result[i]) + } + } + }) + } +} diff --git a/examples/example2/example2.go b/examples/example2/example2.go deleted file mode 100644 index 376c890c8..000000000 --- a/examples/example2/example2.go +++ /dev/null @@ -1,7 +0,0 @@ -package main - -import "fmt" - -func main() { - fmt.Println("Hello, World from Example2!") -} diff --git a/examples/example3/example3.go b/examples/example3/example3.go deleted file mode 100644 index f9bab67c6..000000000 --- a/examples/example3/example3.go +++ /dev/null @@ -1,7 +0,0 @@ -package main - -import "fmt" - -func main() { - fmt.Println("Hello, World from Example3!") -} diff --git a/examples/example4/example4.go b/examples/example4/example4.go deleted file mode 100644 index 12b552e03..000000000 --- a/examples/example4/example4.go +++ /dev/null @@ -1,7 +0,0 @@ -package main - -import "fmt" - -func main() { - fmt.Println("Hello, World from Example4!") -} diff --git a/examples/example5/example5.go b/examples/example5/example5.go deleted file mode 100644 index b5973180c..000000000 --- a/examples/example5/example5.go +++ /dev/null @@ -1,7 +0,0 @@ -package main - -import "fmt" - -func main() { - fmt.Println("Hello, World from Example5!") -} diff --git a/examples/huggingface-leader-board/main.go b/examples/huggingface-leader-board/main.go new file mode 100644 index 000000000..735225663 --- /dev/null +++ b/examples/huggingface-leader-board/main.go @@ -0,0 +1,22 @@ +// Package main shows how to use the seltabl package to scrape a table from a given url. +// The table used in this example is from the huggingface llm leader board. +package main + +import ( + "fmt" + "os" +) + +// main scrapes from: https://huggingface.co/spaces/HuggingFaceH4/LLM-Leaderboard +func main() { + if err := run(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +// run runs the example +func run() error { + fmt.Println("Hello, World from llm leader board!") + return nil +} diff --git a/examples/huggingface-leader-board/main_test.go b/examples/huggingface-leader-board/main_test.go new file mode 100644 index 000000000..2fdc16763 --- /dev/null +++ b/examples/huggingface-leader-board/main_test.go @@ -0,0 +1,13 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestRun tests the run function +func TestRun(t *testing.T) { + err := run() + assert.Nil(t, err) +} diff --git a/examples/ncaa/doc.go b/examples/ncaa/doc.go new file mode 100644 index 000000000..cbf63346a --- /dev/null +++ b/examples/ncaa/doc.go @@ -0,0 +1,3 @@ +// Package main is the an example of how to use the seltabl package. +// for the seltabl package +package main diff --git a/examples/example1/example1.go b/examples/ncaa/example1.go similarity index 100% rename from examples/example1/example1.go rename to examples/ncaa/example1.go diff --git a/examples/penguins-wikipedia/main.go b/examples/penguins-wikipedia/main.go new file mode 100644 index 000000000..2131f268c --- /dev/null +++ b/examples/penguins-wikipedia/main.go @@ -0,0 +1,24 @@ +// Package main is the an example of how to use the seltabl package. +// for the seltabl package to scrape a html table from a given url. +// The table used in this example is from the wikipedia page for +// penguins. +package main + +import ( + "fmt" + "os" +) + +// main scrapes from: https://en.wikipedia.org/wiki/List_of_penguins +func main() { + if err := run(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +// run runs the example +func run() error { + fmt.Println("Hello, World from list of penguins!") + return nil +} diff --git a/examples/penguins-wikipedia/main_test.go b/examples/penguins-wikipedia/main_test.go new file mode 100644 index 000000000..2fdc16763 --- /dev/null +++ b/examples/penguins-wikipedia/main_test.go @@ -0,0 +1,13 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestRun tests the run function +func TestRun(t *testing.T) { + err := run() + assert.Nil(t, err) +} diff --git a/go.mod b/go.mod index 1c2066bfd..7ef36f296 100644 --- a/go.mod +++ b/go.mod @@ -5,20 +5,13 @@ go 1.22.3 require ( github.com/PuerkitoBio/goquery v1.9.2 github.com/brianvoe/gofakeit v3.18.0+incompatible + github.com/pmezard/go-difflib v1.0.0 github.com/stretchr/testify v1.9.0 ) require ( github.com/andybalholm/cascadia v1.3.2 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/fatih/color v1.17.0 // indirect - github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/spf13/cobra v1.8.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect golang.org/x/net v0.24.0 // indirect - golang.org/x/sys v0.19.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index d9fc019c5..f011a0d24 100644 --- a/go.sum +++ b/go.sum @@ -4,25 +4,10 @@ github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsVi github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= github.com/brianvoe/gofakeit v3.18.0+incompatible h1:wDOmHc9DLG4nRjUVVaxA+CEglKOW72Y5+4WNxUIkjM8= github.com/brianvoe/gofakeit v3.18.0+incompatible/go.mod h1:kfwdRA90vvNhPutZWfH7WPaDzUjz+CZFqG+rPkOjGOc= -github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= -github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= -github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= -github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= -github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= -github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= -github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -45,12 +30,8 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= diff --git a/internal/errors/doc.go b/internal/errors/doc.go new file mode 100644 index 000000000..ca3ef7ccc --- /dev/null +++ b/internal/errors/doc.go @@ -0,0 +1,6 @@ +// Package errors is the errors package for the application +// +// # It contains the errors for the application +// +// It is used to return comprehendable errors to the user of the package. +package errors diff --git a/parse.go b/parse.go index 083b7f176..b20cc9f22 100644 --- a/parse.go +++ b/parse.go @@ -12,27 +12,20 @@ import ( ) const ( + // innerTextSelector is the selector used to extract text from a cell. innerTextSelector = "$text" - attrSelector = "@" - - headerTag = "seltabl" - dataSelectorTag = "dSel" + // attrSelector is the selector used to extract attributes from a cell. + attrSelector = "@" + // headerTag is the tag used to mark a header cell. + headerTag = "seltabl" + // dataSelectorTag is the tag used to mark a data cell. + dataSelectorTag = "dSel" + // headerSelectorTag is the tag used to mark a header selector. headerSelectorTag = "hSel" - cellSelectorTag = "cSel" + // cellSelectorTag is the tag used to mark a data selector. + cellSelectorTag = "cSel" ) -// NewFromString parses a string into a slice of structs. -// -// The struct must have a field with the tag seltabl, a header selector with -// the tag hSel, and a data selector with the tag dSel. -func NewFromString[T any](htmlInput string) ([]T, error) { - doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlInput)) - if err != nil { - return nil, fmt.Errorf("failed to parse html: %w", err) - } - return New[T](doc) -} - // New parses a goquery doc into a slice of structs. // // The struct given as an argument must have a field with the tag seltabl, a header selector with @@ -49,6 +42,8 @@ func NewFromString[T any](htmlInput string) ([]T, error) { // // Example: // +// package main +// // var fixture = ` // // @@ -57,20 +52,20 @@ func NewFromString[T any](htmlInput string) ([]T, error) { // // // -// +// // // // -// -// +// +// // // -// -// +// +// // // -// -// +// +// // // //
b
1 12
3 434
5 656
7 878
@@ -92,9 +87,9 @@ func NewFromString[T any](htmlInput string) ([]T, error) { // } func New[T any](doc *goquery.Document) ([]T, error) { dType := reflect.TypeOf((*T)(nil)).Elem() - // if dType.Kind() != reflect.Struct { - // return nil, fmt.Errorf("expected struct, got %s", dType.Kind()) - // } + if dType.Kind() != reflect.Struct && dType.Kind() != reflect.Ptr { + return nil, fmt.Errorf("expected struct, got %s", dType.Kind()) + } results := make([]T, 0) for i := 0; i < dType.NumField(); i++ { field := dType.Field(i) @@ -145,10 +140,10 @@ func New[T any](doc *goquery.Document) ([]T, error) { } for j := 0; j < dataRows.Length(); j++ { if err := SetStructField( - &results[j], // result row for this data row - field.Name, // name of the field to set - dataRows.Eq(j), // goquery selection for cell - cellSelector, // selector for the inner cell + &results[j], // result row for this data row + field.Name, // name of the field to set + dataRows.Eq(j), // goquery selection for cell + &selector{cellSelector}, // selector for the inner cell ); err != nil { return nil, fmt.Errorf( "failed to set field %s: %s", @@ -164,12 +159,123 @@ func New[T any](doc *goquery.Document) ([]T, error) { return results, nil } +// NewFromString parses a string into a slice of structs. +// +// The struct must have a field with the tag seltabl, a header selector with +// the tag hSel, and a data selector with the tag dSel. +// +// Example: +// +// package main +// +// import ( +// "fmt" +// "github.com/conneroisu/seltabl" +// ) +// +// type TableStruct struct { +// A string `json:"a" seltabl:"a" hSel:"tr:nth-child(1) td:nth-child(1)" dSel:"tr td:nth-child(1)" cSel:"$text"` +// B string `json:"b" seltabl:"b" hSel:"tr:nth-child(1) td:nth-child(2)" dSel:"tr td:nth-child(2)" cSel:"$text"` +// } +// +// func main() { +// p, err := seltabl.NewFromString[TableStruct](` +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +//
ab
12
34
56
78
+// `) +// if err != nil { +// panic(err) +// } +// for _, pp := range p { +// fmt.Printf("pp %+v\n", pp) +// } +// } +func NewFromString[T any](htmlInput string) ([]T, error) { + reader := strings.NewReader( + htmlInput, + ) + doc, err := goquery.NewDocumentFromReader( + reader, + ) + if err != nil { + return nil, fmt.Errorf("failed to parse html: %w", err) + } + return New[T](doc) +} + // NewFromReader parses a reader into a slice of structs. // // The reader must be a valid html page with a single table. // // The passed in generic type must be a struct with valid selectors for the // table and data (hSel, dSel, cSel). +// +// Example: +// +// package main +// +// import ( +// "fmt" +// "github.com/conneroisu/seltabl" +// ) +// +// type TableStruct struct { +// A string `json:"a" seltabl:"a" hSel:"tr:nth-child(1) td:nth-child(1)" dSel:"tr td:nth-child(1)" cSel:"$text"` +// B string `json:"b" seltabl:"b" hSel:"tr:nth-child(1) td:nth-child(2)" dSel:"tr td:nth-child(2)" cSel:"$text"` +// } +// +// func main() { +// p, err := seltabl.NewFromReader[TableStruct](strings.NewReader(` +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +//
ab
12
34
56
78
+// `)) +// if err != nil { +// panic(err) +// } +// for _, pp := range p { +// fmt.Printf("pp %+v\n", pp) +// } +// } func NewFromReader[T any](r io.Reader) ([]T, error) { doc, err := goquery.NewDocumentFromReader(r) if err != nil { @@ -178,12 +284,37 @@ func NewFromReader[T any](r io.Reader) ([]T, error) { return New[T](doc) } -// NewFromURL parses a URL into a slice of structs. +// NewFromURL parses a given URL's html into a slice of structs adhering to the +// given generic type. // // The URL must be a valid html page with a single table. // // The passed in generic type must be a struct with valid selectors for the // table and data (hSel, dSel, cSel). +// +// Example: +// +// package main +// +// import ( +// "fmt" +// "github.com/conneroisu/seltabl" +// ) +// +// type TableStruct struct { +// A string `json:"a" seltabl:"a" hSel:"tr:nth-child(1) td:nth-child(1)" dSel:"tr td:nth-child(1)" cSel:"$text"` +// B string `json:"b" seltabl:"b" hSel:"tr:nth-child(1) td:nth-child(2)" dSel:"tr td:nth-child(2)" cSel:"$text"` +// } +// +// func main() { +// p, err := seltabl.NewFromURL[TableStruct]("https://github.com/conneroisu/seltabl/blob/main/testdata/ab_num_table.html") +// if err != nil { +// panic(err) +// } +// for _, pp := range p { +// fmt.Printf("pp %+v\n", pp) +// } +// } func NewFromURL[T any](url string) ([]T, error) { client := &http.Client{} resp, err := client.Get(url) diff --git a/reflect.go b/reflect.go index c023f0350..6c40c24e0 100644 --- a/reflect.go +++ b/reflect.go @@ -4,19 +4,20 @@ import ( "fmt" "reflect" "strconv" - "strings" "github.com/PuerkitoBio/goquery" ) // SetStructField sets a struct field to a value // It uses generics to specify the type of the struct -// and the field name +// and the field name. +// +// It is used by the NewFromString function. func SetStructField[T any]( structPtr *T, fieldName string, cellValue *goquery.Selection, - selector string, + selector SelectorInferface, ) error { v := reflect.ValueOf(structPtr).Elem() field := v.FieldByName(fieldName) @@ -24,113 +25,93 @@ func SetStructField[T any]( return fmt.Errorf("no such field: %s in struct", fieldName) } if !field.CanSet() { - return fmt.Errorf("cannot set field: %s", fieldName) + return fmt.Errorf("cannot change the value of field: %s", fieldName) } fieldType := field.Type().Kind() - var exists bool - var cellText string - - switch selector { - case innerTextSelector: - cellText = cellValue.Text() - cellText = strings.TrimSpace(cellText) - if cellValue.Length() == 0 { - return fmt.Errorf("failed to find selector: %s", selector) - } - case attrSelector: - cellText, exists = cellValue.Attr(selector) - if !exists { - return fmt.Errorf("failed to find selector: %s", selector) - } - default: - print("default") + if !isTypeSupported(field.Type()) { + return fmt.Errorf("unsupported type: %s", field.Type()) + } + var cellText *string + cellText, err := selector.Run(cellValue) + if err != nil { + return fmt.Errorf("failed to run selector: %w", err) } switch fieldType { case reflect.String: - field.SetString(cellText) + field.SetString(*cellText) case reflect.Int: - in, err := strconv.Atoi(cellText) + in, err := strconv.Atoi(*cellText) if err != nil { return fmt.Errorf("failed to parse int: %s", err) } field.SetInt(int64(in)) case reflect.Int8: - in, err := strconv.Atoi(cellText) + in, err := strconv.Atoi(*cellText) if err != nil { return fmt.Errorf("failed to parse int: %s", err) } field.SetInt(int64(in)) case reflect.Int16: - in, err := strconv.Atoi(cellText) + in, err := strconv.Atoi(*cellText) if err != nil { return fmt.Errorf("failed to parse int: %s", err) } field.SetInt(int64(in)) case reflect.Int32: - in, err := strconv.Atoi(cellText) + in, err := strconv.Atoi(*cellText) if err != nil { return fmt.Errorf("failed to parse int: %s", err) } field.SetInt(int64(in)) case reflect.Int64: - in, err := strconv.ParseInt(cellText, 10, 64) + in, err := strconv.ParseInt(*cellText, 10, 64) if err != nil { return fmt.Errorf("failed to parse int: %s", err) } field.SetInt(in) case reflect.Uint: - in, err := strconv.ParseUint(cellText, 10, 64) + in, err := strconv.ParseUint(*cellText, 10, 64) if err != nil { return fmt.Errorf("failed to parse uint: %s", err) } field.SetUint(in) case reflect.Uint8: - in, err := strconv.ParseUint(cellText, 10, 64) + in, err := strconv.ParseUint(*cellText, 10, 64) if err != nil { return fmt.Errorf("failed to parse uint: %s", err) } field.SetUint(in) case reflect.Uint16: - in, err := strconv.ParseUint(cellText, 10, 64) + in, err := strconv.ParseUint(*cellText, 10, 64) if err != nil { return fmt.Errorf("failed to parse uint: %s", err) } field.SetUint(in) case reflect.Uint32: - in, err := strconv.ParseUint(cellText, 10, 64) + in, err := strconv.ParseUint(*cellText, 10, 64) if err != nil { return fmt.Errorf("failed to parse uint: %s", err) } field.SetUint(in) case reflect.Uint64: - in, err := strconv.ParseUint(cellText, 10, 64) + in, err := strconv.ParseUint(*cellText, 10, 64) if err != nil { return fmt.Errorf("failed to parse uint: %s", err) } field.SetUint(in) case reflect.Float32: - in, err := strconv.ParseFloat(cellText, 32) + in, err := strconv.ParseFloat(*cellText, 32) if err != nil { return fmt.Errorf("failed to parse float: %s", err) } field.SetFloat(in) case reflect.Float64: - in, err := strconv.ParseFloat(cellText, 64) + in, err := strconv.ParseFloat(*cellText, 64) if err != nil { return fmt.Errorf("failed to parse float: %s", err) } field.SetFloat(in) - case reflect.Struct: - fieldStructPtr := field.Addr().Interface().(*T) - if err := SetStructField( - fieldStructPtr, - fieldName, - cellValue, - selector, - ); err != nil { - return fmt.Errorf("failed to set field %s: %s", fieldName, err) - } default: return fmt.Errorf("unsupported type: %s", fieldType) } diff --git a/reflect_test.go b/reflect_test.go index 97d7c76ec..55504fbe5 100644 --- a/reflect_test.go +++ b/reflect_test.go @@ -23,6 +23,10 @@ type TestStruct struct { Uint16Field uint16 Uint32Field uint32 Uint64Field uint64 + Int8Field int8 + Int16Field int16 + Int32Field int32 + Int64Field int64 } // TestSetStructField tests the SetStructField function @@ -33,7 +37,7 @@ func TestSetStructField(t *testing.T) { structPtr *TestStruct fieldName string cellHTML string - selector string + selector SelectorInferface wantErr bool expected interface{} }{ @@ -42,7 +46,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Name", cellHTML: `
John Doe
`, - selector: innerTextSelector, + selector: selector{query: innerTextSelector}, wantErr: false, expected: "John Doe", }, @@ -51,7 +55,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "UIntField", cellHTML: `
123
`, - selector: innerTextSelector, + selector: selector{query: innerTextSelector}, wantErr: false, expected: uint(123), }, @@ -60,7 +64,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Uint8Field", cellHTML: `
123
`, - selector: innerTextSelector, + selector: selector{query: innerTextSelector}, wantErr: false, expected: uint8(123), }, @@ -69,7 +73,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Uint16Field", cellHTML: `
123
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: uint16(123), }, @@ -78,7 +82,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Uint32Field", cellHTML: `
123
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: uint32(123), }, @@ -87,7 +91,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Uint64Field", cellHTML: `
123
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: uint64(123), }, @@ -96,7 +100,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Age", cellHTML: `
30
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: 30, }, @@ -105,7 +109,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Score", cellHTML: `
99.5
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: 99.5, }, @@ -114,7 +118,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Score", cellHTML: `
99.5
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: 99.5, }, @@ -123,7 +127,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "InvalidField", cellHTML: `
Invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -132,7 +136,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Age", cellHTML: `
invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -141,7 +145,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "UIntField", cellHTML: `
invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -150,7 +154,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Score", cellHTML: `
invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -159,7 +163,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Uint8Field", cellHTML: `
invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -168,7 +172,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "FloatField2", cellHTML: `
invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -177,7 +181,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Uint16Field", cellHTML: `
invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -186,7 +190,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Uint32Field", cellHTML: `
invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -195,7 +199,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "Uint64Field", cellHTML: `
invalid
`, - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -204,7 +208,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "StringField", cellHTML: "
Test String
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: "Test String", }, @@ -213,7 +217,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "IntField", cellHTML: "
123
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: 123, }, @@ -222,7 +226,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "FloatField", cellHTML: "
123.45
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: 123.45, }, @@ -231,7 +235,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "FloatField2", cellHTML: "
1.45
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: false, expected: float32(1.45), }, @@ -240,7 +244,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "NonExistentField", cellHTML: "
Test
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -249,7 +253,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "UnexportedField", cellHTML: "
456
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -258,7 +262,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "aa", cellHTML: "
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -267,7 +271,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "StringField", cellHTML: "
", - selector: attrSelector, + selector: selector{attrSelector}, wantErr: true, expected: nil, }, @@ -276,7 +280,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "IntField", cellHTML: "
invalid
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -285,7 +289,7 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "FloatField", cellHTML: "
invalid
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, @@ -294,12 +298,83 @@ func TestSetStructField(t *testing.T) { structPtr: &TestStruct{}, fieldName: "FloatField2", cellHTML: "
1.23.45
", - selector: innerTextSelector, + selector: selector{innerTextSelector}, wantErr: true, expected: nil, }, + { + name: "Invalid int8 value", + structPtr: &TestStruct{}, + fieldName: "Int8Field", + cellHTML: "
invalid
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid int16 value", + structPtr: &TestStruct{}, + fieldName: "Int16Field", + cellHTML: "
invalid
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid int32 value", + structPtr: &TestStruct{}, + fieldName: "Int32Field", + cellHTML: "
invalid
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid int64 value", + structPtr: &TestStruct{}, + fieldName: "Int64Field", + cellHTML: "
invalid
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Valid int8 value", + structPtr: &TestStruct{}, + fieldName: "Int8Field", + cellHTML: "
123
", + selector: selector{innerTextSelector}, + wantErr: false, + expected: int8(123), + }, + { + name: "Valid int16 value", + structPtr: &TestStruct{}, + fieldName: "Int16Field", + cellHTML: "
123
", + selector: selector{innerTextSelector}, + wantErr: false, + expected: int16(123), + }, + { + name: "Valid int32 value", + structPtr: &TestStruct{}, + fieldName: "Int32Field", + cellHTML: "
456
", + selector: selector{innerTextSelector}, + wantErr: false, + expected: int32(456), + }, + { + name: "Valid int64 value", + structPtr: &TestStruct{}, + fieldName: "Int64Field", + cellHTML: "
25565
", + selector: selector{innerTextSelector}, + wantErr: false, + expected: int64(25565), + }, } - for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { tt := tt @@ -309,7 +384,310 @@ func TestSetStructField(t *testing.T) { t.Fatalf("failed to create document: %v", err) } cellValue := doc.Find("div") + err = SetStructField(tt.structPtr, tt.fieldName, cellValue, tt.selector) + if (err != nil) != tt.wantErr { + t.Errorf("SetStructField() error = %v, wantErr %v", err, tt.wantErr) + return + } + if err == nil { + v := reflect.ValueOf(tt.structPtr).Elem().FieldByName(tt.fieldName).Interface() + if !reflect.DeepEqual(v, tt.expected) { + t.Errorf("SetStructField() = %v, expected %v", v, tt.expected) + } + } + }) + } +} + +// BenchStruct is a test struct +// fields in this struct are used in the tests +type BenchStruct struct { + Name string + Age int + Score float64 + IntField int + FloatField float64 + FloatField2 float32 + StringField string + UIntField uint + Uint8Field uint8 + Uint16Field uint16 + Uint32Field uint32 + Uint64Field uint64 +} +// BenchSetStructField tests the SetStructField function +func BenchSetStructField(t *testing.B) { + Benchs := []struct { + name string + structPtr *BenchStruct + fieldName string + cellHTML string + selector SelectorInferface + wantErr bool + expected interface{} + }{ + { + name: "Set string field", + structPtr: &BenchStruct{}, + fieldName: "Name", + cellHTML: `
John Doe
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: "John Doe", + }, + { + name: "Set uint field", + structPtr: &BenchStruct{}, + fieldName: "UIntField", + cellHTML: `
123
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: uint(123), + }, + { + name: "Set uint8 field", + structPtr: &BenchStruct{}, + fieldName: "Uint8Field", + cellHTML: `
123
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: uint8(123), + }, + { + name: "Set uint16 field", + structPtr: &BenchStruct{}, + fieldName: "Uint16Field", + cellHTML: `
123
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: uint16(123), + }, + { + name: "Set uint32 field", + structPtr: &BenchStruct{}, + fieldName: "Uint32Field", + cellHTML: `
123
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: uint32(123), + }, + { + name: "Set uint64 field", + structPtr: &BenchStruct{}, + fieldName: "Uint64Field", + cellHTML: `
123
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: uint64(123), + }, + { + name: "Set int field", + structPtr: &BenchStruct{}, + fieldName: "Age", + cellHTML: `
30
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: 30, + }, + { + name: "Set float field (float64)", + structPtr: &BenchStruct{}, + fieldName: "Score", + cellHTML: `
99.5
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: 99.5, + }, + { + name: "Set float field (float32)", + structPtr: &BenchStruct{}, + fieldName: "Score", + cellHTML: `
99.5
`, + selector: selector{innerTextSelector}, + wantErr: false, + expected: 99.5, + }, + { + name: "Invalid field name", + structPtr: &BenchStruct{}, + fieldName: "InvalidField", + cellHTML: `
Invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid int value", + structPtr: &BenchStruct{}, + fieldName: "Age", + cellHTML: `
invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid uint value", + structPtr: &BenchStruct{}, + fieldName: "UIntField", + cellHTML: `
invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid float value", + structPtr: &BenchStruct{}, + fieldName: "Score", + cellHTML: `
invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid Uint8", + structPtr: &BenchStruct{}, + fieldName: "Uint8Field", + cellHTML: `
invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid Float32", + structPtr: &BenchStruct{}, + fieldName: "FloatField2", + cellHTML: `
invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid Uint16", + structPtr: &BenchStruct{}, + fieldName: "Uint16Field", + cellHTML: `
invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid Uint32", + structPtr: &BenchStruct{}, + fieldName: "Uint32Field", + cellHTML: `
invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid Uint64", + structPtr: &BenchStruct{}, + fieldName: "Uint64Field", + cellHTML: `
invalid
`, + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Set string field", + structPtr: &BenchStruct{}, + fieldName: "StringField", + cellHTML: "
Bench String
", + selector: selector{innerTextSelector}, + wantErr: false, + expected: "Bench String", + }, + { + name: "Set int field", + structPtr: &BenchStruct{}, + fieldName: "IntField", + cellHTML: "
123
", + selector: selector{innerTextSelector}, + wantErr: false, + expected: 123, + }, + { + name: "Set float field", + structPtr: &BenchStruct{}, + fieldName: "FloatField", + cellHTML: "
123.45
", + selector: selector{innerTextSelector}, + wantErr: false, + expected: 123.45, + }, + { + name: "set float32 field", + structPtr: &BenchStruct{}, + fieldName: "FloatField2", + cellHTML: "
1.45
", + selector: selector{innerTextSelector}, + wantErr: false, + expected: float32(1.45), + }, + { + name: "Field does not exist", + structPtr: &BenchStruct{}, + fieldName: "NonExistentField", + cellHTML: "
Bench
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Field cannot be set", + structPtr: &BenchStruct{}, + fieldName: "UnexportedField", + cellHTML: "
456
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Selector not found (innerText)", + structPtr: &BenchStruct{}, + fieldName: "aa", + cellHTML: "
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Selector not found (attr)", + structPtr: &BenchStruct{}, + fieldName: "StringField", + cellHTML: "
", + selector: selector{attrSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid int value", + structPtr: &BenchStruct{}, + fieldName: "IntField", + cellHTML: "
invalid
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + { + name: "Invalid float value", + structPtr: &BenchStruct{}, + fieldName: "FloatField", + cellHTML: "
invalid
", + selector: selector{innerTextSelector}, + wantErr: true, + expected: nil, + }, + } + for _, tt := range Benchs { + t.Run(tt.name, func(t *testing.B) { + tt := tt + doc, err := goquery.NewDocumentFromReader(strings.NewReader(tt.cellHTML)) + if err != nil { + t.Fatalf("failed to create document: %v", err) + } + cellValue := doc.Find("div") err = SetStructField(tt.structPtr, tt.fieldName, cellValue, tt.selector) if (err != nil) != tt.wantErr { t.Errorf("SetStructField() error = %v, wantErr %v", err, tt.wantErr) diff --git a/scripts/makefile.coverage.sh b/scripts/makefile.coverage.sh index 7736de0b2..963104d3d 100644 --- a/scripts/makefile.coverage.sh +++ b/scripts/makefile.coverage.sh @@ -1,9 +1,11 @@ #!/bin/bash -# file: makefile.coverage.sh -# url: https://github.com/conneroisu/seltabl/scripts/makefile.coverage.sh -# title: Coverage Script -# description: This script runs the coverage testing for the project. +# file: makefile.test.sh +# url: https://github.com/conneroisu/seltabl/scripts/makefile.test.sh +# title: Test Script +# description: This script runs the test for the project. +# +# usage: make test -go test -coverprofile=coverage.out ./... +go test -race -timeout 30s ./... -gocovsh +go test -coverprofile=coverage.out ./... diff --git a/scripts/makefile.dev.sh b/scripts/makefile.dev.sh deleted file mode 100644 index 6d605f93a..000000000 --- a/scripts/makefile.dev.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -# file: makefile.dev.sh -# url: https://github.com/conneroisu/seltabl/scripts/makefile.dev.sh -# title: Running Development Scripts - -shopt -s globstar - -templ generate --watch --proxy="http://localhost:8080" --cmd="doppler run -- air" diff --git a/scripts/makefile.fmt.sh b/scripts/makefile.fmt.sh index 97fc6abcd..a0f2aae13 100644 --- a/scripts/makefile.fmt.sh +++ b/scripts/makefile.fmt.sh @@ -6,31 +6,6 @@ # # Usage: make fmt +gofmt -w . -targets=( - "*.go" - "**/*.go" - "**/**/*.go" - "**/**/**/*.go" - "**/**/**/**/*.go" - "**/**/**/**/**/*.go" -) - -# For each of the targets, run gofmt and goline. -for target in "${targets[@]}"; do - if ls "$target" &>/dev/null; then - if ! command -v gum &>/dev/null; then - echo "formatting $target with gofmt" - gofmt -w "$target" - echo "formatting $target with golines" - goline -w --max-len=79 "$target" - else - gum spin --spinner dot --title "Formatting Go Files with 'go fmt' in $target" --show-output -- \ - go fmt "$target" - gum spin --spinner dot --title "Formatting Go Files with 'golines' in $target" --show-output -- \ - golines -w --max-len=79 "$target" - fi - else - continue - fi -done +goline -w --max-len=79 . diff --git a/scripts/makefile.lint.sh b/scripts/makefile.lint.sh index 90c2a196d..bf7e45904 100644 --- a/scripts/makefile.lint.sh +++ b/scripts/makefile.lint.sh @@ -6,11 +6,10 @@ # # Usage: make js -gum spin --spinner dot --title "Running Static Check" --show-output -- \ - staticcheck ./... +staticcheck ./... -gum spin --spinner dot --title "Running GolangCI Lint" --show-output -- \ - golangci-lint run +golangci-lint run -gum spin --spinner dot --title "Running GoVet" --show-output -- \ - go vet ./... +go vet ./... + +revive -config .revive.toml ./... diff --git a/scripts/makefile.test.sh b/scripts/makefile.test.sh index 692f28df5..cb86e116d 100644 --- a/scripts/makefile.test.sh +++ b/scripts/makefile.test.sh @@ -3,25 +3,9 @@ # url: https://github.com/conneroisu/seltabl/scripts/makefile.test.sh # title: Test Script # description: This script runs the test for the project. -# +# # usage: make test -gum spin --spinner dot --title "Running Go Test With Race" --show-output -- \ - go test -race -v -timeout 30s ./... +go test -race -v -timeout 30s ./... -go test -coverprofile=coverage.out ./... - -# if gocovsh is executable -if [ -x "$(command -v gocovsh)" ]; then - # if gocovsh is not empty - if [ -s coverage.out ]; then - # run gocovsh - gocovsh - else - # if coverage.out is empty/not found - echo "No coverage.out file found." - fi -else - # if gocovsh is not executable - echo "gocovsh is not executable." -fi +go test -coverprofile=coverage.out ./... diff --git a/scripts/makefile.tidy.sh b/scripts/makefile.tidy.sh index 97f975887..b0f31e20e 100644 --- a/scripts/makefile.tidy.sh +++ b/scripts/makefile.tidy.sh @@ -1,3 +1,9 @@ #!/bin/bash -gum spin --spinner dot --title "Running Go Mod Tidy" --show-output -- \ - go mod tidy +# file: makefile.tidy.sh +# url: https://github.com/conneroisu/seltabl/scripts/makefile.tidy.sh +# title: Running Go Mod Tidy +# description: This script runs go mod tidy to clean up the go.mod and go.sum files. +# +# Usage: make tidy + +go mod tidy diff --git a/scripts/makefile.vet.sh b/scripts/makefile.vet.sh deleted file mode 100644 index f882e1db9..000000000 --- a/scripts/makefile.vet.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -go vet ./... -sqlc vet -cd .. diff --git a/scripts/taskfile.clean.sh b/scripts/taskfile.clean.sh new file mode 100644 index 000000000..03a67d9bb --- /dev/null +++ b/scripts/taskfile.clean.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# file: taskfile.clean.sh +# url: https://github.com/conneroisu/seltabl/scripts/taskfile.clean.sh +# title: Cleaning Script +# description: This script cleans the project + +task install + +# if there is a tmp folder, delete it +if [ -d "tmp" ]; then + rm -rf tmp +fi + +# if there is a bin folder, delete it +if [ -d "bin" ]; then + rm -rf bin +fi + +# if there is a node_modules folder, delete it +if [ -d "node_modules" ]; then + rm -rf node_modules +fi + +# if there is a node_modules in a subfolder, delete it +if [ -d "data/javascript/node_modules" ]; then + rm -rf data/javascript/node_modules +fi + +# if there is a coverage.out file, delete it +if [ -f "coverage.out" ]; then + rm -rf coverage.out +fi diff --git a/scripts/taskfile.coverage.sh b/scripts/taskfile.coverage.sh new file mode 100644 index 000000000..b954e6426 --- /dev/null +++ b/scripts/taskfile.coverage.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# file: taskfile.coverage.sh +# url: https://github.com/conneroisu/seltabl/scripts/taskfile.coverage.sh +# title: Running GoCovSh +# description: This script runs gocovsh to generate a coverage report. +# +# Usage: make coverage + +task install + +# if gocovsh is executable +if [ -x "$(command -v gocovsh)" ]; then + # if gocovsh is not empty + if [ -s coverage.out ]; then + # run gocovsh + gocovsh + else + # if coverage.out is empty/not found + echo "No coverage.out file found." + fi +else + # if gocovsh is not executable + echo "gocovsh is not executable." +fi diff --git a/scripts/taskfile.docs.sh b/scripts/taskfile.docs.sh new file mode 100644 index 000000000..e399d7639 --- /dev/null +++ b/scripts/taskfile.docs.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Name: makefile.docs.sh +# Url: +# https://github.com/conneroisu/seltabl/main/scripts/makefile.docs.sh +# +# Description: A script to generate the go docs for the project. +# +# Usage: make docs + +gum spin --spinner dot --title "Making Docs Folder" --show-output -- \ + mkdir docs + +gum spin --spinner dot --title "Generating Docs" --show-output -- \ + golds -s -gen -wdpkgs-listing=promoted -dir=./docs -footer=verbose+qrcode + +gum spin --spinner dot --title "Opening Docs Folder" --show-output -- \ + xdg-open ./docs/index.html diff --git a/scripts/taskfile.fmt.sh b/scripts/taskfile.fmt.sh new file mode 100644 index 000000000..059f6d105 --- /dev/null +++ b/scripts/taskfile.fmt.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# file: taskfile.fmt.sh +# url: https://github.com/conneroisu/seltabl/scripts/taskfile.fmt.sh +# title: Formatting Go Files +# description: This script formats the Go files using gofmt and golines. +# +# Usage: make fmt + +gum spin --spinner dot --title "Formatting Go Files with 'go fmt' in ." --show-output -- \ + gofmt -w . + +gum spin --spinner dot --title "Formatting Go Files with 'golines' in ." --show-output -- \ + goline -w --max-len=79 . diff --git a/scripts/taskfile.install.sh b/scripts/taskfile.install.sh new file mode 100644 index 000000000..200e2d7d9 --- /dev/null +++ b/scripts/taskfile.install.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# file: taskfile.install.sh +# url: https://github.com/conneroisu/seltabl/scripts/taskfile.dev.requirements.sh +# title: Installing Development Requirements +# description: This script installs the required development tools for the project. + +# Check if the command, brew, exists, if not install it +command -v brew >/dev/null 2>&1 || /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + +# Check if the command, go, exists, if not install it +command -v go >/dev/null 2>&1 || brew install go + +# Check if the command, gum, exists, if not install it +command -v gum >/dev/null 2>&1 || go install github.com/charmbracelet/gum@latest + +# Check if the command, protoc, exists, if not install it +gum spin --spinner dot --title "Installing Protoc" --show-output -- \ + command -v protoc >/dev/null 2>&1 || go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.28 && go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.2 + +# Check if the command, sqlite3, exists, if not install it +gum spin --spinner dot --title "Installing SQLite3" --show-output -- \ + command -v sqlite3 >/dev/null 2>&1 || brew install sqlite + +# Check if the command, sqldiff, exists, if not install it +gum spin --spinner dot --title "Installing SQLDiff" --show-output -- \ + command -v sqldiff >/dev/null 2>&1 || brew install sqldiff + +# Check if the command, sqlc, exists, if not install it +gum spin --spinner dot --title "Installing SQLC" --show-output -- \ + command -v sqlc >/dev/null 2>&1 || brew install sqlc diff --git a/scripts/taskfile.lint.sh b/scripts/taskfile.lint.sh new file mode 100644 index 000000000..47c7022de --- /dev/null +++ b/scripts/taskfile.lint.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# file: taskfile.test.sh +# url: https://github.com/conneroisu/seltabl/scripts/taskfile.test.sh +# title: Test Script +# description: This script runs the test for the project. +# +# usage: make test + +go test -race -v -timeout 30s ./... + +go test -coverprofile=coverage.out ./... + +# if gocovsh is executable +if [ -x "$(command -v gocovsh)" ]; then + # if gocovsh is not empty + if [ -s coverage.out ]; then + # run gocovsh + gocovsh + else + # if coverage.out is empty/not found + echo "No coverage.out file found." + fi +else + # if gocovsh is not executable + echo "gocovsh is not executable." +fi diff --git a/scripts/taskfile.test.sh b/scripts/taskfile.test.sh new file mode 100644 index 000000000..38b472f05 --- /dev/null +++ b/scripts/taskfile.test.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# file: makefile.test.sh +# url: https://github.com/conneroisu/seltabl/scripts/makefile.test.sh +# title: Test Script +# description: This script runs the test for the project. +# +# usage: make test + +gum spin --spinner dot --title "Running Tests" --show-output -- \ + go test -race -timeout 30s ./... + +gum spin --spinner dot --title "Generating Coverage" --show-output -- \ + go test -coverprofile=coverage.out ./... diff --git a/scripts/taskfile.tidy.sh b/scripts/taskfile.tidy.sh new file mode 100644 index 000000000..6a88d4ed4 --- /dev/null +++ b/scripts/taskfile.tidy.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# file: makefile.tidy.sh +# url: https://github.com/conneroisu/seltabl/scripts/makefile.tidy.sh +# title: Running Go Mod Tidy +# description: This script runs go mod tidy to clean up the go.mod and go.sum files. +# +# Usage: make tidy + +gum spin --spinner dot --title "Running Go Mod Tidy" --show-output -- \ + go mod tidy diff --git a/select.go b/select.go new file mode 100644 index 000000000..ce2801e68 --- /dev/null +++ b/select.go @@ -0,0 +1,41 @@ +package seltabl + +import ( + "fmt" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +// SelectorInferface is an interface for running a goquery selector on a cellValue +type SelectorInferface interface { + Run(cellValue *goquery.Selection) (*string, error) +} + +// Selector is a struct for running a goquery selector on a cellValue +type selector struct { + query string +} + +// Run runs the selector on the cellValue and sets the cellText +// and returns the cellText +func (s selector) Run(cellValue *goquery.Selection) (*string, error) { + var cellText string + var exists bool + switch s.query { + case innerTextSelector: + cellText = cellValue.Text() + cellText = strings.TrimSpace(cellText) + if cellValue.Length() == 0 { + return nil, fmt.Errorf("failed to find selector: %s", s.query) + } + case attrSelector: + cellText, exists = cellValue.Attr(s.query) + if !exists { + return nil, fmt.Errorf("failed to find selector: %s", s.query) + } + default: + print("default") + } + return &cellText, nil +} diff --git a/select_test.go b/select_test.go new file mode 100644 index 000000000..6add15c83 --- /dev/null +++ b/select_test.go @@ -0,0 +1 @@ +package seltabl diff --git a/valid.go b/valid.go new file mode 100644 index 000000000..3dc315173 --- /dev/null +++ b/valid.go @@ -0,0 +1,39 @@ +package seltabl + +import "reflect" + +// isTypeSupported checks if the type is supported +func isTypeSupported(typ reflect.Type) bool { + switch typ.Kind() { + case reflect.String: + return true + case reflect.Int: + return true + case reflect.Int8: + return true + case reflect.Int16: + return true + case reflect.Int32: + return true + case reflect.Int64: + return true + case reflect.Uint: + return true + case reflect.Uint8: + return true + case reflect.Uint16: + return true + case reflect.Uint32: + return true + case reflect.Uint64: + return true + case reflect.Float32: + return true + case reflect.Float64: + return true + case reflect.Struct: + return false + default: + return false + } +} diff --git a/valid_test.go b/valid_test.go new file mode 100644 index 000000000..f641da242 --- /dev/null +++ b/valid_test.go @@ -0,0 +1,42 @@ +package seltabl + +import ( + "reflect" + "testing" +) + +// TestIsTypeSupported tests the isTypeSupported function +func TestIsTypeSupported(t *testing.T) { + t.Parallel() + tests := []struct { + name string + reflectType reflect.Type + want bool + }{ + { + name: "Invalid type (struct)", + reflectType: reflect.TypeOf(TestStruct{}), + want: false, + }, + { + name: "Invalid type", + reflectType: reflect.TypeOf(testing.T{}), + want: false, + }, + + { + name: "Invalid type InternalTest", + reflectType: reflect.TypeOf(testing.InternalTest{}), + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt := tt + t.Parallel() + if got := isTypeSupported(tt.reflectType); got != tt.want { + t.Errorf("isValidType() = %v, want %v", got, tt.want) + } + }) + } +}