diff --git a/.vscode/launch.json b/.vscode/launch.json index f731486..6dd3c37 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -11,8 +11,8 @@ "cwd": "${workspaceFolder}/", "args": ["-t", "golang", "-x", "1", "--safe-mode"] // "args": ["-t", "golang", "-x", "1", "-bl", "./bl.txt", "--safe-mode"] - // "args": ["--pr", "csvfile", "-x", "1", "-r", "9", "--cf", "./test.csv", "--safe-mode", "--sh", "--topic", "csv"] - // "args": ["--pr", "jsonfile", "-x", "1", "-r", "9", "--cf", "./test.json", "--safe-mode", "--topic", "json"] + // "args": ["--pr", "contentfile", "-x", "1", "-r", "9", "--cf", "./test.csv", "--safe-mode", "--sh", "--topic", "csv"] + // "args": ["--pr", "contentfile", "-x", "1", "-r", "9", "--cf", "./test.json", "--safe-mode", "--topic", "json"] } ] } \ No newline at end of file diff --git a/README.md b/README.md index 755e189..8a3f03b 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,7 @@ GLOBAL OPTIONS: --tweet-language, --tl bool for allowing twetting the language of the repo (default: false) --safe-mode, --sf bool for safe mode. If safe mode is enabled, no repository is published (default: false) --provider value, --pr value provider where publishable content comes from (default: "github") - --publisher value, --pub value list of comma separared publishers (default: "twitter") + --publisher value, --pub value list of comma separated publishers (default: "twitter") --content-file value, --cf value file containing content to publish --skip-csv-header, --sh bool to skip CSV file header. If true, then first record of CSV file is skipped (default: false) --blacklist value, --bl value optional file containing blacklisted repository Ids @@ -150,22 +150,22 @@ For running the bot for Golang every 15 minutes and specifying a blacklist file   `larry --topic golang --time 15 --blacklist ./blacklist.txt` -For running the bot every 60 minutes using the "jsonfile" provider and JSON file for content +For running the bot every 60 minutes using the "contentfile" provider and JSON file for content -  `larry --time 60 --provider jsonfile --content-file ./content.json` +  `larry --time 60 --provider contentfile --content-file ./content.json` -For running the bot every 60 minutes using the "csvfile" provider to read CSV file for content and skipping the header record +For running the bot every 60 minutes using the "contentfile" provider to read CSV file for content and skipping the header record -  `larry --time 60 --provider csvfile --content-file ./content.csv --skip-csv-header` +  `larry --time 60 --provider contentfile --content-file ./content.csv --skip-csv-header` ## Content Files -Two providers, `jsonfile` and `csvfile`, publish content from files. +The `contentfile` provider serves content from CSV and JSON files. ### JSON Content File -The `jsonfile` provider publishes random content from a JSON file. This file consists of an array of objects in the following format. ExtraData is an array of strings. +When the `contentfile` provider receives a `content-file` filename with a `.json` extension, the provider serves random content from the JSON file. This file consists of an array of objects in the following format. ExtraData is an array of strings. ``` [{ @@ -178,7 +178,7 @@ The `jsonfile` provider publishes random content from a JSON file. This file con ### CSV Content File -The `csvfile` provider publishes random content from a comma separated values (CSV) file. Each field may or may not be enclosed in double quotes. The ExtraData strings start at field 4 of the record and a record can contain any number of elements. +When the `contentfile` provider receives a `content-file` filename with a `.csv` extension, the provider serves random content from the CSV file. Each field may or may not be enclosed in double quotes. The ExtraData strings start at field 4 of the record and a record can contain any number of elements. The following file has one record with three ExtraData strings. @@ -209,9 +209,9 @@ An example blacklist file containing GitHub repository IDs. The file can contain 456 ``` -### Jsonfile & Csvfile Providers +### Contentfile Provider -For the `jsonfile` and `csvfile` providers, the optional blacklist file consists of titles to exclude from the publishing process. +For the `contentfile` provider, the optional blacklist file consists of content titles to exclude from the publishing process. ## Have questions? Need help with the bot? diff --git a/cmd/larry/main.go b/cmd/larry/main.go index 2f89561..141c3b7 100644 --- a/cmd/larry/main.go +++ b/cmd/larry/main.go @@ -12,9 +12,8 @@ import ( "github.com/ezeoleaf/larry/config" "github.com/ezeoleaf/larry/larry" "github.com/ezeoleaf/larry/provider" - "github.com/ezeoleaf/larry/provider/csvfile" + "github.com/ezeoleaf/larry/provider/contentfile" "github.com/ezeoleaf/larry/provider/github" - "github.com/ezeoleaf/larry/provider/jsonfile" "github.com/ezeoleaf/larry/publisher" githubPub "github.com/ezeoleaf/larry/publisher/github" "github.com/ezeoleaf/larry/publisher/twitter" @@ -104,12 +103,9 @@ func getProvider(cfg config.Config) (larry.Provider, error) { if cfg.Provider == provider.Github { np := github.NewProvider(githubAccessToken, cfg, cacheClient) return np, nil - } else if cfg.Provider == provider.Jsonfile { - np := jsonfile.NewProvider(cfg, cacheClient) - return np, nil - } else if cfg.Provider == provider.Csvfile { - np := csvfile.NewProvider(cfg, cacheClient) - return np, nil + } else if cfg.Provider == provider.Contentfile { + np, err := contentfile.NewProvider(cfg, cacheClient) + return np, err } return nil, nil diff --git a/config/config.go b/config/config.go index ae94580..e66b115 100644 --- a/config/config.go +++ b/config/config.go @@ -18,7 +18,6 @@ type Config struct { Publishers string BlacklistFile string ContentFile string - FileFormat string SkipCsvHeader bool } diff --git a/provider/const.go b/provider/const.go index 6f26882..7c3ae5a 100644 --- a/provider/const.go +++ b/provider/const.go @@ -2,5 +2,4 @@ package provider // Github is the value of the valid provider const Github = "github" -const Jsonfile = "jsonfile" -const Csvfile = "csvfile" +const Contentfile = "contentfile" diff --git a/provider/contentfile/csvfile.go b/provider/contentfile/csvfile.go new file mode 100644 index 0000000..ce6efdd --- /dev/null +++ b/provider/contentfile/csvfile.go @@ -0,0 +1,96 @@ +package contentfile + +import ( + "encoding/csv" + "io" + "log" + "math/rand" + "time" + + "github.com/ezeoleaf/larry/domain" +) + +type CsvFileReader struct { + skipHeader bool +} + +func NewCsvFileReader(skipHeader bool) ContentFileReader { + return CsvFileReader{skipHeader: skipHeader} +} + +func (r CsvFileReader) getContentFromReader(handle io.Reader, skip func(string) bool) (*domain.Content, error) { + size := 1 + var reservoir []string + rand.Seed(time.Now().UnixNano()) + + count := 0 + skipHeader := r.skipHeader + csvReader := csv.NewReader(handle) + for { + rec, err := csvReader.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + // skip header line + if skipHeader { + skipHeader = false + continue + } + + if rec[0] == "" { + log.Println("content missing title, skipping record") + continue + } + + if skip(rec[0]) { + continue + } + + // reservoir sampling technique + if count < size { + reservoir = rec + } else { + j := rand.Intn(count + 1) + if j < size { + reservoir = rec + } + } + + count++ + } + + if count > 0 { + if content, err := convertCsvToContent(reservoir); err != nil { + return nil, err + } else { + return content, nil + } + } + + return nil, nil +} + +func convertCsvToContent(rec []string) (*domain.Content, error) { + content := domain.Content{ExtraData: []string{}} + if len(rec) > 0 { + content.Title = StringToPointer(rec[0]) + } + if len(rec) > 1 { + content.Subtitle = StringToPointer(rec[1]) + } + if len(rec) > 2 { + content.URL = StringToPointer(rec[2]) + } + if len(rec) > 3 { + // number of extra data fields is variable for CSV + content.ExtraData = make([]string, len(rec)-3) + for i := 3; i < len(rec); i++ { + content.ExtraData[i-3] = rec[i] + } + } + return &content, nil +} diff --git a/provider/csvfile/provider_test.go b/provider/contentfile/csvfile_test.go similarity index 93% rename from provider/csvfile/provider_test.go rename to provider/contentfile/csvfile_test.go index d5ddc31..31ffb26 100644 --- a/provider/csvfile/provider_test.go +++ b/provider/contentfile/csvfile_test.go @@ -1,4 +1,4 @@ -package csvfile +package contentfile import ( "encoding/json" @@ -13,7 +13,7 @@ import ( "github.com/go-redis/redis/v8" ) -func TestGetContentFromReader(t *testing.T) { +func TestGetCsvContentFromReader(t *testing.T) { for _, tc := range []struct { Name string CachedItems []string @@ -159,10 +159,14 @@ title-1,subtitle-1,url-1,extradata-1-1,extradata-1-2 cc.Set("blacklist-"+item, "1", 0) } - cfg := config.Config{SkipCsvHeader: tc.SkipHeader} - p := Provider{Config: cfg, CacheClient: cc} + cfg := config.Config{SkipCsvHeader: tc.SkipHeader, ContentFile: "./test.csv"} + p, err := NewProvider(cfg, cc) + if err != nil { + fmt.Println(err) + t.Error(err) + } - if content, err := p.getContentFromReader(strings.NewReader(tc.ContentFile)); err != nil { + if content, err := p.FileReader.getContentFromReader(strings.NewReader(tc.ContentFile), p.skipCachedRecord); err != nil { if tc.ExpectedError != err.Error() { fmt.Println(err) t.Error(err) @@ -183,11 +187,6 @@ title-1,subtitle-1,url-1,extradata-1-1,extradata-1-2 if string(expected) != string(got) { t.Errorf("expected %v as value, got %v instead", string(expected), string(got)) } - - // check cache for returned object - if _, err := p.CacheClient.Get(*tc.ExpectedContent.Title); err != nil { - t.Errorf("expected %v not found in cache", *tc.ExpectedContent.Title) - } } } }) diff --git a/provider/contentfile/jsonfile.go b/provider/contentfile/jsonfile.go new file mode 100644 index 0000000..a9d69b4 --- /dev/null +++ b/provider/contentfile/jsonfile.go @@ -0,0 +1,66 @@ +package contentfile + +import ( + "encoding/json" + "io" + "log" + "math/rand" + "time" + + "github.com/ezeoleaf/larry/domain" +) + +type JsonFileReader struct { +} + +func NewJsonFileReader() ContentFileReader { + return JsonFileReader{} +} + +func (r JsonFileReader) getContentFromReader(handle io.Reader, skip func(string) bool) (*domain.Content, error) { + size := 1 + reservoir := domain.Content{} + rand.Seed(time.Now().UnixNano()) + + decoder := json.NewDecoder(handle) + if _, err := decoder.Token(); err != nil { + if err.Error() == "EOF" { + return nil, nil + } + return nil, err + } + + count := 0 + for decoder.More() { + data := new(domain.Content) + if err := decoder.Decode(data); err != nil { + return nil, err + } + + if data.Title == nil || *data.Title == "" { + log.Println("content missing title, skipping record") + continue + } + + if skip(*data.Title) { + continue + } + + // reservoir sampling technique + if count < size { + reservoir = *data + } else { + j := rand.Intn(count + 1) + if j < size { + reservoir = *data + } + } + count++ + } + + if count > 0 { + return &reservoir, nil + } + + return nil, nil +} diff --git a/provider/jsonfile/provider_test.go b/provider/contentfile/jsonfile_test.go similarity index 89% rename from provider/jsonfile/provider_test.go rename to provider/contentfile/jsonfile_test.go index 8548548..42f685e 100644 --- a/provider/jsonfile/provider_test.go +++ b/provider/contentfile/jsonfile_test.go @@ -1,4 +1,4 @@ -package jsonfile +package contentfile import ( "encoding/json" @@ -13,7 +13,7 @@ import ( "github.com/go-redis/redis/v8" ) -func TestGetContentFromReader(t *testing.T) { +func TestGetJsonContentFromReader(t *testing.T) { for _, tc := range []struct { Name string CachedItems []string @@ -84,10 +84,14 @@ func TestGetContentFromReader(t *testing.T) { cc.Set("blacklist-"+item, "1", 0) } - cfg := config.Config{} - p := Provider{Config: cfg, CacheClient: cc} + cfg := config.Config{ContentFile: "./test.json"} + p, err := NewProvider(cfg, cc) + if err != nil { + fmt.Println(err) + t.Error(err) + } - if content, err := p.getContentFromReader(strings.NewReader(tc.ContentFile)); err != nil { + if content, err := p.FileReader.getContentFromReader(strings.NewReader(tc.ContentFile), p.skipCachedRecord); err != nil { if tc.ExpectedError != err.Error() { fmt.Println(err.Error()) t.Error(err) @@ -108,11 +112,6 @@ func TestGetContentFromReader(t *testing.T) { if string(expected) != string(got) { t.Errorf("expected %v as value, got %v instead", string(expected), string(got)) } - - // check cache for returned object - if _, err := p.CacheClient.Get(*tc.ExpectedContent.Title); err != nil { - t.Errorf("expected %v not found in cache", *tc.ExpectedContent.Title) - } } } }) diff --git a/provider/contentfile/provider.go b/provider/contentfile/provider.go new file mode 100644 index 0000000..02f2255 --- /dev/null +++ b/provider/contentfile/provider.go @@ -0,0 +1,119 @@ +package contentfile + +import ( + "fmt" + "io" + "os" + "path/filepath" + "time" + + "github.com/ezeoleaf/larry/cache" + "github.com/ezeoleaf/larry/config" + "github.com/ezeoleaf/larry/domain" + "github.com/go-redis/redis/v8" +) + +type Provider struct { + CacheClient cache.Client + Config config.Config + FileReader ContentFileReader +} + +type ContentFileReader interface { + getContentFromReader(handle io.Reader, skip func(string) bool) (*domain.Content, error) +} + +func NewProvider(cfg config.Config, cacheClient cache.Client) (Provider, error) { + + ext := filepath.Ext(cfg.ContentFile) + if ext == "" { + return Provider{}, fmt.Errorf("no file extension provided, unable to determine file format") + } + + var fileReader ContentFileReader + switch ext { + case ".json": + fileReader = NewJsonFileReader() + case ".csv": + fileReader = NewCsvFileReader(cfg.SkipCsvHeader) + default: + return Provider{}, fmt.Errorf("unsupported content file format: %s", ext) + } + + p := Provider{ + Config: cfg, + CacheClient: cacheClient, + FileReader: fileReader, + } + return p, nil +} + +// GetContentToPublish returns content to publish to be used by the publishers +func (p Provider) GetContentToPublish() (*domain.Content, error) { + return p.getContentFromFile(p.Config.ContentFile) +} + +func (p Provider) getContentFromFile(fileName string) (*domain.Content, error) { + if fileName != "" { + f, err := os.OpenFile(fileName, os.O_RDONLY, os.ModePerm) + if err != nil { + return nil, err + } + defer f.Close() + + if content, err := p.FileReader.getContentFromReader(f, p.skipCachedRecord); err != nil { + return nil, err + } else { + p.addToCache(*content.Title) + return content, nil + } + } + + return nil, fmt.Errorf("No content file specified") +} + +func StringToPointer(in string) *string { + return &in +} + +func (p Provider) skipCachedRecord(title string) bool { + if p.isCached(title) { + return true + } else if p.isBlacklisted(title) { + return true + } + return false +} + +func (p Provider) isCached(title string) bool { + key := cacheKey(p.Config.GetCacheKeyPrefix(), title) + _, err := p.CacheClient.Get(key) + if err != redis.Nil { + return true + } + return false +} + +func (p Provider) isBlacklisted(title string) bool { + if _, err := p.CacheClient.Get("blacklist-" + title); err != redis.Nil { + return true + } + return false +} + +func (p Provider) cacheExpirationMinutes() time.Duration { + expirationMinutes := p.Config.CacheSize * p.Config.Periodicity + if expirationMinutes < 0 { + expirationMinutes = 0 + } + return time.Duration(expirationMinutes) * time.Minute +} + +func cacheKey(cacheKeyPrefix string, title string) string { + return cacheKeyPrefix + title +} + +func (p Provider) addToCache(title string) { + key := cacheKey(p.Config.GetCacheKeyPrefix(), title) + p.CacheClient.Set(key, true, p.cacheExpirationMinutes()) +} diff --git a/provider/contentfile/provider_test.go b/provider/contentfile/provider_test.go new file mode 100644 index 0000000..7c53646 --- /dev/null +++ b/provider/contentfile/provider_test.go @@ -0,0 +1,120 @@ +package contentfile + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/alicebob/miniredis/v2" + "github.com/ezeoleaf/larry/cache" + "github.com/ezeoleaf/larry/config" + "github.com/ezeoleaf/larry/domain" + "github.com/go-redis/redis/v8" +) + +func TestGetContentFromFile(t *testing.T) { + for _, tc := range []struct { + Name string + CachedItems []string + BlacklistedItems []string + ContentFile string + ExpectedContent *domain.Content + ExpectedError string + }{ + { + Name: "Success json", + CachedItems: []string{"title-0"}, + BlacklistedItems: []string{"title-1"}, + ContentFile: "test.json", + ExpectedContent: &domain.Content{ + Title: StringToPointer("title-2"), + Subtitle: StringToPointer("subtitle-2"), + URL: StringToPointer("url-2"), + ExtraData: []string{"extradata-2-1", "extradata-2-2"}, + }, + }, + { + Name: "Success csv", + CachedItems: []string{"title-0"}, + BlacklistedItems: []string{"title-1"}, + ContentFile: "test.json", + ExpectedContent: &domain.Content{ + Title: StringToPointer("title-2"), + Subtitle: StringToPointer("subtitle-2"), + URL: StringToPointer("url-2"), + ExtraData: []string{"extradata-2-1", "extradata-2-2"}, + }, + }, + { + Name: "Error no file extension", + CachedItems: []string{"title-0"}, + BlacklistedItems: []string{"title-1"}, + ContentFile: "test", // no file extension provided + ExpectedContent: nil, + ExpectedError: "no file extension provided, unable to determine file format", + }, + { + Name: "Error invalid file extension", + CachedItems: []string{"title-0"}, + BlacklistedItems: []string{"title-1"}, + ContentFile: "test.txt", // this file extension is not supported + ExpectedContent: nil, + ExpectedError: "unsupported content file format: .txt", + }, + } { + t.Run(tc.Name, func(t *testing.T) { + + mr, _ := miniredis.Run() + ro := &redis.Options{ + Addr: mr.Addr(), + } + cc := cache.NewClient(ro) + + for _, item := range tc.CachedItems { + cc.Set(item, "1", 0) + } + for _, item := range tc.BlacklistedItems { + cc.Set("blacklist-"+item, "1", 0) + } + + cfg := config.Config{ContentFile: fmt.Sprintf("./testdata/%s", tc.ContentFile)} + p, err := NewProvider(cfg, cc) + if err != nil { + if tc.ExpectedError != err.Error() { + fmt.Println(err.Error()) + t.Error(err) + } + return + } + + if content, err := p.GetContentToPublish(); err != nil { + if tc.ExpectedError != err.Error() { + fmt.Println(err.Error()) + t.Error(err) + } + } else { + if content == nil && tc.ExpectedContent == nil { + // success + } else if content == nil && tc.ExpectedContent != nil { + t.Errorf("expected %v as value, got nil instead", *tc.ExpectedContent.Title) + } else if content != nil && tc.ExpectedContent == nil { + t.Errorf("expected nil as value, got %v instead", *content.Title) + } else if *content.Title != *tc.ExpectedContent.Title { + t.Errorf("expected %v as value, got %v instead", *&tc.ExpectedContent.Title, *content.Title) + } else { + // compare returned object + expected, _ := json.Marshal(tc.ExpectedContent) + got, _ := json.Marshal(content) + if string(expected) != string(got) { + t.Errorf("expected %v as value, got %v instead", string(expected), string(got)) + } + + // check cache for returned object + if _, err := p.CacheClient.Get(*tc.ExpectedContent.Title); err != nil { + t.Errorf("expected %v not found in cache", *tc.ExpectedContent.Title) + } + } + } + }) + } +} diff --git a/provider/contentfile/testdata/test.csv b/provider/contentfile/testdata/test.csv new file mode 100644 index 0000000..2a2dcfb --- /dev/null +++ b/provider/contentfile/testdata/test.csv @@ -0,0 +1,3 @@ +"title-0","subtitle-0","url-0","extradata-0-1","extradata-0-2" +title-1,subtitle-1,url-1,extradata-1-1,extradata-1-2 +"title-2","subtitle-2","url-2","extradata-2-1","extradata-2-2,embedded comma" diff --git a/provider/contentfile/testdata/test.json b/provider/contentfile/testdata/test.json new file mode 100644 index 0000000..5dff7cc --- /dev/null +++ b/provider/contentfile/testdata/test.json @@ -0,0 +1,16 @@ +[{ + "Title": "title-0", + "Subtitle": "subtitle-0", + "URL": "url-0", + "ExtraData": ["extradata-0-1", "extradata-0-2"] +}, { + "Title": "title-1", + "Subtitle": "subtitle-1", + "URL": "url-1", + "ExtraData": ["extradata-1-1", "extradata-1-2"] +}, { + "Title": "title-2", + "Subtitle": "subtitle-2", + "URL": "url-2", + "ExtraData": ["extradata-2-1", "extradata-2-2"] +}] diff --git a/provider/csvfile/provider.go b/provider/csvfile/provider.go deleted file mode 100644 index e676e19..0000000 --- a/provider/csvfile/provider.go +++ /dev/null @@ -1,162 +0,0 @@ -package csvfile - -import ( - "encoding/csv" - "fmt" - "io" - "log" - "math/rand" - "os" - "time" - - "github.com/ezeoleaf/larry/cache" - "github.com/ezeoleaf/larry/config" - "github.com/ezeoleaf/larry/domain" - "github.com/go-redis/redis/v8" -) - -type Provider struct { - CacheClient cache.Client - Config config.Config -} - -func NewProvider(cfg config.Config, cacheClient cache.Client) Provider { - log.Print("New Csvfile Provider") - p := Provider{Config: cfg, CacheClient: cacheClient} - return p -} - -// GetContentToPublish returns content to publish to be used by the publishers -func (p Provider) GetContentToPublish() (*domain.Content, error) { - return p.getContentFromFile(p.Config.ContentFile) -} - -func (p Provider) getContentFromFile(csvFileName string) (*domain.Content, error) { - if csvFileName != "" { - f, err := os.OpenFile(csvFileName, os.O_RDONLY, os.ModePerm) - if err != nil { - return nil, err - } - defer f.Close() - - return p.getContentFromReader(f) - } - - return nil, fmt.Errorf("No csv file specified") -} - -func (p Provider) getContentFromReader(handle io.Reader) (*domain.Content, error) { - - size := 1 - var reservoir []string - rand.Seed(time.Now().UnixNano()) - - count := 0 - skipHeader := p.Config.SkipCsvHeader - csvReader := csv.NewReader(handle) - for { - rec, err := csvReader.Read() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - - // skip header line - if skipHeader { - skipHeader = false - continue - } - - if rec[0] == "" { - log.Println("content missing title, skipping record") - continue - } - - // check for content in cache/blacklist - if p.isCached(rec[0]) { - continue - } else if p.isBlacklisted(rec[0]) { - log.Printf("content blacklisted: %s\n", rec[0]) - continue - } - - // reservoir sampling technique - if count < size { - reservoir = rec - } else { - j := rand.Intn(count + 1) - if j < size { - reservoir = rec - } - } - - count++ - } - - if count > 0 { - if content, err := convertCsvToContent(reservoir); err != nil { - return nil, err - } else { - key := cacheKey(p.Config.GetCacheKeyPrefix(), *content.Title) - p.CacheClient.Set(key, true, p.cacheExpirationMinutes()) - return content, nil - } - } - - return nil, nil -} - -func convertCsvToContent(rec []string) (*domain.Content, error) { - content := domain.Content{ExtraData: []string{}} - if len(rec) > 0 { - content.Title = StringToPointer(rec[0]) - } - if len(rec) > 1 { - content.Subtitle = StringToPointer(rec[1]) - } - if len(rec) > 2 { - content.URL = StringToPointer(rec[2]) - } - if len(rec) > 3 { - // number of extra data fields is variable for CSV - content.ExtraData = make([]string, len(rec)-3) - for i := 3; i < len(rec); i++ { - content.ExtraData[i-3] = rec[i] - } - } - return &content, nil -} - -func StringToPointer(in string) *string { - return &in -} - -func (p Provider) isCached(title string) bool { - key := cacheKey(p.Config.GetCacheKeyPrefix(), title) - _, err := p.CacheClient.Get(key) - if err != redis.Nil { - return true - } - return false -} - -func (p Provider) cacheExpirationMinutes() time.Duration { - expirationMinutes := p.Config.CacheSize * p.Config.Periodicity - if expirationMinutes < 0 { - expirationMinutes = 0 - } - return time.Duration(expirationMinutes) * time.Minute -} - -func (p Provider) isBlacklisted(title string) bool { - if _, err := p.CacheClient.Get("blacklist-" + title); err != redis.Nil { - return true - } - return false -} - -func cacheKey(cacheKeyPrefix string, title string) string { - return cacheKeyPrefix + title -} diff --git a/provider/jsonfile/provider.go b/provider/jsonfile/provider.go deleted file mode 100644 index 5c7bf05..0000000 --- a/provider/jsonfile/provider.go +++ /dev/null @@ -1,133 +0,0 @@ -package jsonfile - -import ( - "encoding/json" - "fmt" - "io" - "log" - "math/rand" - "os" - "time" - - "github.com/ezeoleaf/larry/cache" - "github.com/ezeoleaf/larry/config" - "github.com/ezeoleaf/larry/domain" - "github.com/go-redis/redis/v8" -) - -type Provider struct { - CacheClient cache.Client - Config config.Config -} - -func NewProvider(cfg config.Config, cacheClient cache.Client) Provider { - log.Print("New Jsonfile Provider") - p := Provider{Config: cfg, CacheClient: cacheClient} - return p -} - -// GetContentToPublish returns content to publish to be used by the publishers -func (p Provider) GetContentToPublish() (*domain.Content, error) { - return p.getContentFromFile(p.Config.ContentFile) -} - -func (p Provider) getContentFromFile(jsonFileName string) (*domain.Content, error) { - if jsonFileName != "" { - f, err := os.OpenFile(jsonFileName, os.O_RDONLY, os.ModePerm) - if err != nil { - return nil, err - } - defer f.Close() - - return p.getContentFromReader(f) - } - - return nil, fmt.Errorf("No json file specified") -} - -func (p Provider) getContentFromReader(handle io.Reader) (*domain.Content, error) { - - size := 1 - reservoir := domain.Content{} - rand.Seed(time.Now().UnixNano()) - - decoder := json.NewDecoder(handle) - if _, err := decoder.Token(); err != nil { - if err.Error() == "EOF" { - return nil, nil - } - return nil, err - } - - count := 0 - for decoder.More() { - data := new(domain.Content) - if err := decoder.Decode(data); err != nil { - return nil, err - } - - if data.Title == nil || *data.Title == "" { - log.Println("content missing title, skipping record") - continue - } - - // check for content in cache/blacklist - if p.isCached(*data.Title) { - continue - } else if p.isBlacklisted(*data.Title) { - log.Printf("content blacklisted: %s\n", *data.Title) - continue - } - - // reservoir sampling technique - if count < size { - reservoir = *data - } else { - j := rand.Intn(count + 1) - if j < size { - reservoir = *data - } - } - count++ - } - - if count > 0 { - key := cacheKey(p.Config.GetCacheKeyPrefix(), *reservoir.Title) - p.CacheClient.Set(key, true, p.cacheExpirationMinutes()) - return &reservoir, nil - } - - return nil, nil -} - -func StringToPointer(in string) *string { - return &in -} - -func (p Provider) isCached(title string) bool { - key := cacheKey(p.Config.GetCacheKeyPrefix(), title) - _, err := p.CacheClient.Get(key) - if err != redis.Nil { - return true - } - return false -} - -func (p Provider) cacheExpirationMinutes() time.Duration { - expirationMinutes := p.Config.CacheSize * p.Config.Periodicity - if expirationMinutes < 0 { - expirationMinutes = 0 - } - return time.Duration(expirationMinutes) * time.Minute -} - -func (p Provider) isBlacklisted(title string) bool { - if _, err := p.CacheClient.Get("blacklist-" + title); err != redis.Nil { - return true - } - return false -} - -func cacheKey(cacheKeyPrefix string, title string) string { - return cacheKeyPrefix + title -}