Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #2

Merged
merged 7 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,8 @@ jobs:
go install honnef.co/go/tools/cmd/staticcheck@latest
staticcheck ./...
go vet ./...

- name: revive
run: |
go install github.com/mgechev/revive@latest
revive -config .revive.toml ./...
File renamed without changes.
24 changes: 0 additions & 24 deletions .testcoverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,3 @@ threshold:
# (optional; default 0)
# The minimum total coverage project should have
total: 0

# Holds regexp rules which will override thresholds for matched files or packages
# using their paths.
#
# First rule from this list that matches file or package is going to apply
# new threshold to it. If project has multiple rules that match same path,
# override rules should be listed in order from specific to more general rules.
override:
# Increase coverage threshold to 100% for `foo` package
# (default is 80, as configured above in this example)
- threshold: 100
path: ^pkg/lib/foo$

# Holds regexp rules which will exclude matched files or packages
# from coverage statistics
exclude:
# Exclude files or packages matching their paths
paths:
- \.pb\.go$ # excludes all protobuf generated files
- ^pkg/bar # exclude package `pkg/bar`

# NOTES:
# - symbol `/` in all path regexps will be replaced by current OS file path separator
# to properly work on Windows
2 changes: 0 additions & 2 deletions cmd/cmds/docs.go

This file was deleted.

26 changes: 0 additions & 26 deletions cmd/cmds/gen.go

This file was deleted.

2 changes: 0 additions & 2 deletions cmd/doc.go

This file was deleted.

33 changes: 0 additions & 33 deletions cmd/root.go

This file was deleted.

10 changes: 0 additions & 10 deletions cmd/routes.go

This file was deleted.

14 changes: 11 additions & 3 deletions decoder.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package seltabl

import "io"
import (
"fmt"
"io"
)

// Decoder is a struct for decoding a reader into a slice of structs.
//
Expand Down Expand Up @@ -72,7 +75,12 @@ func NewDecoder[T any](r io.ReadCloser) *Decoder[T] {
// This allows for decoding a reader into a slice of structs.
//
// Similar to the json.Decoder for brevity.
func (d *Decoder[T]) Decode(value *T) ([]T, error) {
func (d *Decoder[T]) Decode() ([]T, error) {
defer d.reader.Close()
return NewFromReader[T](d.reader)
var result []T
result, err := NewFromReader[T](d.reader)
if err != nil {
return nil, fmt.Errorf("failed to decode: %w", err)
}
return result, nil
}
138 changes: 138 additions & 0 deletions decoder_test.go
Original file line number Diff line number Diff line change
@@ -1 +1,139 @@
package seltabl

import (
"io"
"strings"
"testing"
)

// DecodeExStruct is a test struct
type DecodeExStruct struct {
A string `json:"a" seltabl:"a" hSel:"tr:nth-child(1) td:nth-child(1)" dSel:"tr td:nth-child(1)" cSel:"$text"`
B string `json:"b" seltabl:"b" hSel:"tr:nth-child(1) td:nth-child(2)" dSel:"tr td:nth-child(2)" cSel:"$text"`
}

// TestDecoder_Decode tests the Decoder.Decode function
func TestDecoder_Decode(t *testing.T) {
testCases := []struct {
name string
input string
expected []DecodeExStruct
hasError bool
}{
{
name: "Valid input",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
</tr>
<tr>
<td>1</td>
<td>2</td>
</tr>
<tr>
<td>3</td>
<td>4</td>
</tr>
</table>
`,
expected: []DecodeExStruct{
{A: "1", B: "2"},
{A: "3", B: "4"},
},
hasError: false,
},
{
name: "Invalid input",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
</tr>
<tr>
<td>1</td>
</tr>
</table>
`,
expected: nil,
hasError: true,
},
{
name: "Invalid input with invalid html",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
</tr>
<tr>
<td>1</td>
</tr>
</table>
`,
expected: nil,
hasError: true,
},
{
name: "Invalid input with invalid json",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
</tr>
<tr>
<td>1</td>
</tr>
</table>
`,
expected: nil,
hasError: true,
},
{
name: "Invalid input with invalid json",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
<td>1</td>
</tr>
</table>
`,
expected: nil,
hasError: true,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
r := io.NopCloser(strings.NewReader(tc.input))
decoder := NewDecoder[DecodeExStruct](r)
result, err := decoder.Decode()

if tc.hasError {
if err == nil {
t.Errorf("Expected an error, but got none")
}
return
}

if err != nil {
t.Errorf("Unexpected error: %v", err)
}

if len(result) != len(tc.expected) {
t.Errorf("Expected %d results, but got %d", len(tc.expected), len(result))
}

for i, expected := range tc.expected {
if result[i].A != expected.A || result[i].B != expected.B {
t.Errorf("Expected %+v, but got %+v", expected, result[i])
}
}
})
}
}
7 changes: 0 additions & 7 deletions examples/example2/example2.go

This file was deleted.

7 changes: 0 additions & 7 deletions examples/example3/example3.go

This file was deleted.

7 changes: 0 additions & 7 deletions examples/example4/example4.go

This file was deleted.

7 changes: 0 additions & 7 deletions examples/example5/example5.go

This file was deleted.

22 changes: 22 additions & 0 deletions examples/huggingface-leader-board/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Package main shows how to use the seltabl package to scrape a table from a given url.

Check failure on line 1 in examples/huggingface-leader-board/main.go

View workflow job for this annotation

GitHub Actions / Update coverage badge

File test coverage below threshold

File test coverage below threshold: coverage: 40%; threshold: 80%
// The table used in this example is from the huggingface llm leader board.
package main

import (
"fmt"
"os"
)

// main scrapes from: https://huggingface.co/spaces/HuggingFaceH4/LLM-Leaderboard
func main() {
if err := run(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}

// run runs the example
func run() error {
fmt.Println("Hello, World from llm leader board!")
return nil
}
13 changes: 13 additions & 0 deletions examples/huggingface-leader-board/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package main

import (
"testing"

"github.com/stretchr/testify/assert"
)

// TestRun tests the run function
func TestRun(t *testing.T) {
err := run()
assert.Nil(t, err)
}
3 changes: 3 additions & 0 deletions examples/ncaa/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package main is the an example of how to use the seltabl package.
// for the seltabl package
package main
File renamed without changes.
24 changes: 24 additions & 0 deletions examples/penguins-wikipedia/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Package main is the an example of how to use the seltabl package.

Check failure on line 1 in examples/penguins-wikipedia/main.go

View workflow job for this annotation

GitHub Actions / Update coverage badge

File test coverage below threshold

File test coverage below threshold: coverage: 40%; threshold: 80%
// for the seltabl package to scrape a html table from a given url.
// The table used in this example is from the wikipedia page for
// penguins.
package main

import (
"fmt"
"os"
)

// main scrapes from: https://en.wikipedia.org/wiki/List_of_penguins
func main() {
if err := run(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}

// run runs the example
func run() error {
fmt.Println("Hello, World from list of penguins!")
return nil
}
13 changes: 13 additions & 0 deletions examples/penguins-wikipedia/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package main

import (
"testing"

"github.com/stretchr/testify/assert"
)

// TestRun tests the run function
func TestRun(t *testing.T) {
err := run()
assert.Nil(t, err)
}
Loading
Loading