Skip to content

Commit

Permalink
Refactored to one content file provider
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Savage committed Jul 3, 2022
1 parent 39855a2 commit 866198b
Show file tree
Hide file tree
Showing 15 changed files with 455 additions and 338 deletions.
4 changes: 2 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
"cwd": "${workspaceFolder}/",
"args": ["-t", "golang", "-x", "1", "--safe-mode"]
// "args": ["-t", "golang", "-x", "1", "-bl", "./bl.txt", "--safe-mode"]
// "args": ["--pr", "csvfile", "-x", "1", "-r", "9", "--cf", "./test.csv", "--safe-mode", "--sh", "--topic", "csv"]
// "args": ["--pr", "jsonfile", "-x", "1", "-r", "9", "--cf", "./test.json", "--safe-mode", "--topic", "json"]
// "args": ["--pr", "contentfile", "-x", "1", "-r", "9", "--cf", "./test.csv", "--safe-mode", "--sh", "--topic", "csv"]
// "args": ["--pr", "contentfile", "-x", "1", "-r", "9", "--cf", "./test.json", "--safe-mode", "--topic", "json"]
}
]
}
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ GLOBAL OPTIONS:
--tweet-language, --tl bool for allowing twetting the language of the repo (default: false)
--safe-mode, --sf bool for safe mode. If safe mode is enabled, no repository is published (default: false)
--provider value, --pr value provider where publishable content comes from (default: "github")
--publisher value, --pub value list of comma separared publishers (default: "twitter")
--publisher value, --pub value list of comma separated publishers (default: "twitter")
--content-file value, --cf value file containing content to publish
--skip-csv-header, --sh bool to skip CSV file header. If true, then first record of CSV file is skipped (default: false)
--blacklist value, --bl value optional file containing blacklisted repository Ids
Expand All @@ -150,22 +150,22 @@ For running the bot for Golang every 15 minutes and specifying a blacklist file

  `larry --topic golang --time 15 --blacklist ./blacklist.txt`

For running the bot every 60 minutes using the "jsonfile" provider and JSON file for content
For running the bot every 60 minutes using the "contentfile" provider and JSON file for content

  `larry --time 60 --provider jsonfile --content-file ./content.json`
  `larry --time 60 --provider contentfile --content-file ./content.json`

For running the bot every 60 minutes using the "csvfile" provider to read CSV file for content and skipping the header record
For running the bot every 60 minutes using the "contentfile" provider to read CSV file for content and skipping the header record

  `larry --time 60 --provider csvfile --content-file ./content.csv --skip-csv-header`
  `larry --time 60 --provider contentfile --content-file ./content.csv --skip-csv-header`


## Content Files

Two providers, `jsonfile` and `csvfile`, publish content from files.
The `contentfile` provider serves content from CSV and JSON files.

### JSON Content File

The `jsonfile` provider publishes random content from a JSON file. This file consists of an array of objects in the following format. ExtraData is an array of strings.
When the `contentfile` provider receives a `content-file` filename with a `.json` extension, the provider serves random content from the JSON file. This file consists of an array of objects in the following format. ExtraData is an array of strings.

```
[{
Expand All @@ -178,7 +178,7 @@ The `jsonfile` provider publishes random content from a JSON file. This file con

### CSV Content File

The `csvfile` provider publishes random content from a comma separated values (CSV) file. Each field may or may not be enclosed in double quotes. The ExtraData strings start at field 4 of the record and a record can contain any number of elements.
When the `contentfile` provider receives a `content-file` filename with a `.csv` extension, the provider serves random content from the CSV file. Each field may or may not be enclosed in double quotes. The ExtraData strings start at field 4 of the record and a record can contain any number of elements.

The following file has one record with three ExtraData strings.

Expand Down Expand Up @@ -209,9 +209,9 @@ An example blacklist file containing GitHub repository IDs. The file can contain
456
```

### Jsonfile & Csvfile Providers
### Contentfile Provider

For the `jsonfile` and `csvfile` providers, the optional blacklist file consists of titles to exclude from the publishing process.
For the `contentfile` provider, the optional blacklist file consists of content titles to exclude from the publishing process.

## Have questions? Need help with the bot?

Expand Down
12 changes: 4 additions & 8 deletions cmd/larry/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ import (
"github.com/ezeoleaf/larry/config"
"github.com/ezeoleaf/larry/larry"
"github.com/ezeoleaf/larry/provider"
"github.com/ezeoleaf/larry/provider/csvfile"
"github.com/ezeoleaf/larry/provider/contentfile"
"github.com/ezeoleaf/larry/provider/github"
"github.com/ezeoleaf/larry/provider/jsonfile"
"github.com/ezeoleaf/larry/publisher"
githubPub "github.com/ezeoleaf/larry/publisher/github"
"github.com/ezeoleaf/larry/publisher/twitter"
Expand Down Expand Up @@ -104,12 +103,9 @@ func getProvider(cfg config.Config) (larry.Provider, error) {
if cfg.Provider == provider.Github {
np := github.NewProvider(githubAccessToken, cfg, cacheClient)
return np, nil
} else if cfg.Provider == provider.Jsonfile {
np := jsonfile.NewProvider(cfg, cacheClient)
return np, nil
} else if cfg.Provider == provider.Csvfile {
np := csvfile.NewProvider(cfg, cacheClient)
return np, nil
} else if cfg.Provider == provider.Contentfile {
np, err := contentfile.NewProvider(cfg, cacheClient)
return np, err
}

return nil, nil
Expand Down
1 change: 0 additions & 1 deletion config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ type Config struct {
Publishers string
BlacklistFile string
ContentFile string
FileFormat string
SkipCsvHeader bool
}

Expand Down
3 changes: 1 addition & 2 deletions provider/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ package provider

// Github is the value of the valid provider
const Github = "github"
const Jsonfile = "jsonfile"
const Csvfile = "csvfile"
const Contentfile = "contentfile"
96 changes: 96 additions & 0 deletions provider/contentfile/csvfile.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package contentfile

import (
"encoding/csv"
"io"
"log"
"math/rand"
"time"

"github.com/ezeoleaf/larry/domain"
)

type CsvFileReader struct {
skipHeader bool
}

func NewCsvFileReader(skipHeader bool) ContentFileReader {
return CsvFileReader{skipHeader: skipHeader}
}

func (r CsvFileReader) getContentFromReader(handle io.Reader, skip func(string) bool) (*domain.Content, error) {
size := 1
var reservoir []string
rand.Seed(time.Now().UnixNano())

count := 0
skipHeader := r.skipHeader
csvReader := csv.NewReader(handle)
for {
rec, err := csvReader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}

// skip header line
if skipHeader {
skipHeader = false
continue
}

if rec[0] == "" {
log.Println("content missing title, skipping record")
continue
}

if skip(rec[0]) {
continue
}

// reservoir sampling technique
if count < size {
reservoir = rec
} else {
j := rand.Intn(count + 1)
if j < size {
reservoir = rec
}
}

count++
}

if count > 0 {
if content, err := convertCsvToContent(reservoir); err != nil {
return nil, err
} else {
return content, nil
}
}

return nil, nil
}

func convertCsvToContent(rec []string) (*domain.Content, error) {
content := domain.Content{ExtraData: []string{}}
if len(rec) > 0 {
content.Title = StringToPointer(rec[0])
}
if len(rec) > 1 {
content.Subtitle = StringToPointer(rec[1])
}
if len(rec) > 2 {
content.URL = StringToPointer(rec[2])
}
if len(rec) > 3 {
// number of extra data fields is variable for CSV
content.ExtraData = make([]string, len(rec)-3)
for i := 3; i < len(rec); i++ {
content.ExtraData[i-3] = rec[i]
}
}
return &content, nil
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package csvfile
package contentfile

import (
"encoding/json"
Expand All @@ -13,7 +13,7 @@ import (
"github.com/go-redis/redis/v8"
)

func TestGetContentFromReader(t *testing.T) {
func TestGetCsvContentFromReader(t *testing.T) {
for _, tc := range []struct {
Name string
CachedItems []string
Expand Down Expand Up @@ -159,10 +159,14 @@ title-1,subtitle-1,url-1,extradata-1-1,extradata-1-2
cc.Set("blacklist-"+item, "1", 0)
}

cfg := config.Config{SkipCsvHeader: tc.SkipHeader}
p := Provider{Config: cfg, CacheClient: cc}
cfg := config.Config{SkipCsvHeader: tc.SkipHeader, ContentFile: "./test.csv"}
p, err := NewProvider(cfg, cc)
if err != nil {
fmt.Println(err)
t.Error(err)
}

if content, err := p.getContentFromReader(strings.NewReader(tc.ContentFile)); err != nil {
if content, err := p.FileReader.getContentFromReader(strings.NewReader(tc.ContentFile), p.skipCachedRecord); err != nil {
if tc.ExpectedError != err.Error() {
fmt.Println(err)
t.Error(err)
Expand All @@ -183,11 +187,6 @@ title-1,subtitle-1,url-1,extradata-1-1,extradata-1-2
if string(expected) != string(got) {
t.Errorf("expected %v as value, got %v instead", string(expected), string(got))
}

// check cache for returned object
if _, err := p.CacheClient.Get(*tc.ExpectedContent.Title); err != nil {
t.Errorf("expected %v not found in cache", *tc.ExpectedContent.Title)
}
}
}
})
Expand Down
66 changes: 66 additions & 0 deletions provider/contentfile/jsonfile.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package contentfile

import (
"encoding/json"
"io"
"log"
"math/rand"
"time"

"github.com/ezeoleaf/larry/domain"
)

type JsonFileReader struct {
}

func NewJsonFileReader() ContentFileReader {
return JsonFileReader{}
}

func (r JsonFileReader) getContentFromReader(handle io.Reader, skip func(string) bool) (*domain.Content, error) {
size := 1
reservoir := domain.Content{}
rand.Seed(time.Now().UnixNano())

decoder := json.NewDecoder(handle)
if _, err := decoder.Token(); err != nil {
if err.Error() == "EOF" {
return nil, nil
}
return nil, err
}

count := 0
for decoder.More() {
data := new(domain.Content)
if err := decoder.Decode(data); err != nil {
return nil, err
}

if data.Title == nil || *data.Title == "" {
log.Println("content missing title, skipping record")
continue
}

if skip(*data.Title) {
continue
}

// reservoir sampling technique
if count < size {
reservoir = *data
} else {
j := rand.Intn(count + 1)
if j < size {
reservoir = *data
}
}
count++
}

if count > 0 {
return &reservoir, nil
}

return nil, nil
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package jsonfile
package contentfile

import (
"encoding/json"
Expand All @@ -13,7 +13,7 @@ import (
"github.com/go-redis/redis/v8"
)

func TestGetContentFromReader(t *testing.T) {
func TestGetJsonContentFromReader(t *testing.T) {
for _, tc := range []struct {
Name string
CachedItems []string
Expand Down Expand Up @@ -84,10 +84,14 @@ func TestGetContentFromReader(t *testing.T) {
cc.Set("blacklist-"+item, "1", 0)
}

cfg := config.Config{}
p := Provider{Config: cfg, CacheClient: cc}
cfg := config.Config{ContentFile: "./test.json"}
p, err := NewProvider(cfg, cc)
if err != nil {
fmt.Println(err)
t.Error(err)
}

if content, err := p.getContentFromReader(strings.NewReader(tc.ContentFile)); err != nil {
if content, err := p.FileReader.getContentFromReader(strings.NewReader(tc.ContentFile), p.skipCachedRecord); err != nil {
if tc.ExpectedError != err.Error() {
fmt.Println(err.Error())
t.Error(err)
Expand All @@ -108,11 +112,6 @@ func TestGetContentFromReader(t *testing.T) {
if string(expected) != string(got) {
t.Errorf("expected %v as value, got %v instead", string(expected), string(got))
}

// check cache for returned object
if _, err := p.CacheClient.Get(*tc.ExpectedContent.Title); err != nil {
t.Errorf("expected %v not found in cache", *tc.ExpectedContent.Title)
}
}
}
})
Expand Down
Loading

0 comments on commit 866198b

Please sign in to comment.