diff --git a/main.go b/main.go index 3633fdc..b2cd770 100644 --- a/main.go +++ b/main.go @@ -17,6 +17,8 @@ import ( "github.com/hoenn/go-hn/pkg/hnapi" ) +var validCategoryParam = regexp.MustCompile(`^(\w+ *\w*)+( *, *\w* *\w*)*$`) + func startClassifierServer(ctx context.Context) { cmd := exec.CommandContext(ctx, "uvicorn", "main:app") err := cmd.Start() @@ -42,149 +44,137 @@ func OperationsMode(db *badger.DB, mode string) { if err != nil { log.Fatalln("Error accessing mobiPath, Did you set the config using -c?ing db for mobiPath, Did you set the config using -c?", err) - } else { + } + + err = item.Value(func(val []byte) error { + // This func with val would only be called if item.Value encounters no error. + + // Accessing val here is valid. + fmt.Printf("The .mobi path is: %s\n", val) + + // Copying or parsing val is valid. + mobiPath = string(append([]byte{}, val...)) - err := item.Value(func(val []byte) error { - // This func with val would only be called if item.Value encounters no error. + return nil + }) - // Accessing val here is valid. - fmt.Printf("The .mobi path is: %s\n", val) + if err != nil { + log.Fatalln("Item not found in the database", err) + } + + switch mode { + case "item": + fmt.Println("Enter the HN story or comment item id:") + var itemId string - // Copying or parsing val is valid. - mobiPath = string(append([]byte{}, val...)) + scanner := bufio.NewScanner(os.Stdin) + scanner.Scan() + if scanner.Err() != nil { + log.Fatalln("Error in getting the HN item id") + } + itemId = scanner.Text() - return nil - }) + c := hnapi.NewHNClient() + // Get the details of the current max item. + item, err := c.Item(itemId) if err != nil { - log.Fatalln("Item not found in the database", err) + log.Fatalln("Item mode, Error fetching story item") } - switch mode { - case "item": - fmt.Println("Enter the HN story or comment item id:") - var itemId string + fmt.Println("Creating temporary directory for storing .pdf files") + dir, err := ioutil.TempDir("", "hn") + if err != nil { + log.Fatal(err) + } - scanner := bufio.NewScanner(os.Stdin) - scanner.Scan() - if scanner.Err() != nil { - log.Fatalln("Error in getting the HN item id") - } else { - itemId = scanner.Text() - } + fmt.Println("Temporary directory name:", dir) - c := hnapi.NewHNClient() - // Get the details of the current max item. - item, err := c.Item(itemId) + switch hnItem := item.(type) { + case *hnapi.Story: + fmt.Println("Found HN story") + operations.HTMLtoPDFGenerator(db, nil, hnItem, nil, dir+"/", string(mobiPath)) + case *hnapi.Comment: + fmt.Println("Found HN comment") + operations.HTMLtoPDFGenerator(db, nil, nil, hnItem, dir+"/", string(mobiPath)) + } + os.Remove(dir) + case "filter": - if err != nil { - log.Fatalln("Item mode, Error fetching story item") - } + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) - fmt.Println("Creating temporary directory for storing .pdf files") - dir, err := ioutil.TempDir("", "hn") - if err != nil { - log.Fatal(err) - } + // Increment the WaitGroup synchronously in the main method, to avoid + // racing with the goroutine starting. + wg.Add(1) + go func() { + startClassifierServer(ctx) + // Signal the goroutine has completed + wg.Done() + }() - fmt.Println("Temporary directory name:", dir) - - switch item.(type) { - case *hnapi.Story: - fmt.Println("Found HN story") - storyItem := item.(*hnapi.Story) - operations.HTMLtoPDFGenerator(db, nil, storyItem, nil, dir+"/", string(mobiPath)) - break - case *hnapi.Comment: - fmt.Println("Found HN comment") - commentItem := item.(*hnapi.Comment) - operations.HTMLtoPDFGenerator(db, nil, nil, commentItem, dir+"/", string(mobiPath)) - break - } - os.Remove(dir) - break - case "filter": - - var wg sync.WaitGroup - ctx, cancel := context.WithCancel(context.Background()) - - // Increment the WaitGroup synchronously in the main method, to avoid - // racing with the goroutine starting. - wg.Add(1) - go func() { - startClassifierServer(ctx) - // Signal the goroutine has completed - wg.Done() - }() - - fmt.Println("Enter the categories for filtering separted by a comma, e.g. Tech,Climate,Gaming:") - var categories []string - var categoryParam string - - scanner := bufio.NewScanner(os.Stdin) - scanner.Scan() - if scanner.Err() != nil { - log.Fatalln("Error in getting the categories") - } else { - categoryParam = scanner.Text() - } + fmt.Println("Enter the categories for filtering separted by a comma, e.g. Tech,Climate,Gaming:") + var categories []string + var categoryParam string - if len(categoryParam) == 0 { - log.Fatalln("No categories were entered, try again") - } else { - re := regexp.MustCompile("^(\\w+ *\\w*)+( *, *\\w* *\\w*)*$") - if !(re.MatchString(categoryParam)) { - log.Fatalln("Invalid category entered, Enter categories separated by a comma, e.g. Tech,Climate,Gaming:") - } - } + scanner := bufio.NewScanner(os.Stdin) + scanner.Scan() + if scanner.Err() != nil { + log.Fatalln("Error in getting the categories") + } + categoryParam = scanner.Text() - categoriesTemp := strings.Split(categoryParam, ",") + if len(categoryParam) == 0 { + log.Fatalln("No categories were entered, try again") + } - for _, category := range categoriesTemp { - if len(category) > 1000 { - fmt.Println("Was the category name in german?") - } - categories = append(categories, strings.TrimSpace(category)) - } + if !(validCategoryParam.MatchString(categoryParam)) { + log.Fatalln("Invalid category entered, Enter categories separated by a comma, e.g. Tech,Climate,Gaming:") + } - log.Println("Categories: ", categories) + categoriesTemp := strings.Split(categoryParam, ",") - log.Println("Creating temporary directory for storing .pdf files") - dir, err := ioutil.TempDir("", "hn") - if err != nil { - log.Fatal(err) + for _, category := range categoriesTemp { + if len(category) > 1000 { + fmt.Println("Was the category name in german?") } + categories = append(categories, strings.TrimSpace(category)) + } - log.Println("Temporary directory name:", dir) - - operations.UpdateStories(db, dir+"/", mobiPath, categories) - os.Remove(dir) + log.Println("Categories: ", categories) - log.Println("closing via ctx") - cancel() + log.Println("Creating temporary directory for storing .pdf files") + dir, err := ioutil.TempDir("", "hn") + if err != nil { + log.Fatal(err) + } - // Wait for the child goroutine to finish, which will only occur when - // the child process has stopped and the call to cmd.Wait has returned. - // This prevents main() exiting prematurely. - wg.Wait() + log.Println("Temporary directory name:", dir) - break + operations.UpdateStories(db, dir+"/", mobiPath, categories) + os.Remove(dir) - default: - log.Println("Creating temporary directory for storing .pdf files") - dir, err := ioutil.TempDir("", "hn") - if err != nil { - log.Fatal(err) - } + log.Println("closing via ctx") + cancel() - log.Println("Temporary directory name:", dir) + // Wait for the child goroutine to finish, which will only occur when + // the child process has stopped and the call to cmd.Wait has returned. + // This prevents main() exiting prematurely. + wg.Wait() - operations.UpdateStories(db, dir+"/", mobiPath, nil) - os.Remove(dir) - break + default: + log.Println("Creating temporary directory for storing .pdf files") + dir, err := ioutil.TempDir("", "hn") + if err != nil { + log.Fatal(err) } + log.Println("Temporary directory name:", dir) + + operations.UpdateStories(db, dir+"/", mobiPath, nil) + os.Remove(dir) } + return nil }) @@ -203,6 +193,7 @@ func main() { if err != nil { log.Fatal(err) } + defer db.Close() args := os.Args[1:] if len(args) > 0 { @@ -215,78 +206,76 @@ func main() { scanner.Scan() if scanner.Err() != nil { log.Fatalln("Error in getting the answer for storing path for the ebook") - } else { - mobiAnswer := scanner.Text() - if mobiAnswer == "Y" { - fmt.Println("Enter a path for storing .mobi files on the e-reader(After creating the folder) e.g. /run/media/username/Kindle/documents/Downloads/hn/ :") - scanner.Scan() - if scanner.Err() != nil { - log.Fatalln("Error in getting the path for storing the ebook") - } else { - mobiPath = scanner.Text() - } - err = db.Update(func(txn *badger.Txn) error { - err := txn.Set([]byte("mobiPath"), []byte(mobiPath)) - return err - }) - if err != nil { - log.Fatal(err) - } - log.Println("Stored mobiPath for future operations") - } else if mobiAnswer == "N" { - log.Println("Not setting up path for .mobi this time") + } + + mobiAnswer := scanner.Text() + if mobiAnswer == "Y" { + fmt.Println("Enter a path for storing .mobi files on the e-reader(After creating the folder) e.g. /run/media/username/Kindle/documents/Downloads/hn/ :") + scanner.Scan() + if scanner.Err() != nil { + log.Fatalln("Error in getting the path for storing the ebook") } else { - log.Println("Invalid answer, Enter Y (or) N") + mobiPath = scanner.Text() + } + err = db.Update(func(txn *badger.Txn) error { + err := txn.Set([]byte("mobiPath"), []byte(mobiPath)) + return err + }) + if err != nil { + log.Fatal(err) } + log.Println("Stored mobiPath for future operations") + } else if mobiAnswer == "N" { + log.Println("Not setting up path for .mobi this time") + } else { + log.Println("Invalid answer, Enter Y (or) N") } fmt.Println("Would you like to setup category filter? Y/N") scanner.Scan() if scanner.Err() != nil { log.Fatalln("Error in getting the answer for category filter") - } else { - categoryFilter := scanner.Text() - - if categoryFilter == "Y" { - log.Println("Downloading model for classification") - log.Println("This would take a while....") - - var out []byte - out, err = exec.Command("git", "clone", "https://huggingface.co/typeform/distilbert-base-uncased-mnli", "models/distilbert-base-uncased-mnli/").CombinedOutput() - - // if there is an error with our execution - // handle it here - if err != nil { - log.Println("Downloading models, Error executing command to download models. Check if the models folder is empty", err) - return - } - log.Println("Command Successfully Executed") - output := string(out[:]) - log.Println(output) - - log.Println("Installing necessary python packages") - - out, err = exec.Command("pip", "install", "-r", "requirements.txt").CombinedOutput() - - // if there is an error with our execution - // handle it here - if err != nil { - log.Println("Installing python packages, Error executing command to install python packages. Install the packages manually.", err) - return - } - log.Println("Command Successfully Executed") - output = string(out) - log.Println(output) - - } else if categoryFilter == "N" { - log.Println("Category filter not enabled") - } else { - log.Println("Invalid answer, Enter Y (or) N") + } + categoryFilter := scanner.Text() + + if categoryFilter == "Y" { + log.Println("Downloading model for classification") + log.Println("This would take a while....") + + var out []byte + out, err = exec.Command("git", "clone", "https://huggingface.co/typeform/distilbert-base-uncased-mnli", "models/distilbert-base-uncased-mnli/").CombinedOutput() + + // if there is an error with our execution + // handle it here + if err != nil { + log.Println("Downloading models, Error executing command to download models. Check if the models folder is empty", err) + return } + log.Println("Command Successfully Executed") + output := string(out[:]) + log.Println(output) + + log.Println("Installing necessary python packages") + + out, err = exec.Command("pip", "install", "-r", "requirements.txt").CombinedOutput() + + // if there is an error with our execution + // handle it here + if err != nil { + log.Println("Installing python packages, Error executing command to install python packages. Install the packages manually.", err) + return + } + log.Println("Command Successfully Executed") + output = string(out) + log.Println(output) + + } else if categoryFilter == "N" { + log.Println("Category filter not enabled") + } else { + log.Println("Invalid answer, Enter Y (or) N") } log.Println("Configuration done, You can now use ./hntoebook") - break case "-i": log.Println("Entering item mode") OperationsMode(db, "item") @@ -299,6 +288,4 @@ func main() { } else { OperationsMode(db, "default") } - - defer db.Close() } diff --git a/stories/operations/mobi.go b/stories/operations/mobi.go index e6234d4..17f5d0b 100644 --- a/stories/operations/mobi.go +++ b/stories/operations/mobi.go @@ -3,14 +3,15 @@ package operations import ( "context" "fmt" - "github.com/dgraph-io/badger/v3" - "github.com/hoenn/go-hn/pkg/hnapi" "hntoebook/stories" "log" "os/exec" "strconv" "strings" "time" + + "github.com/dgraph-io/badger/v3" + "github.com/hoenn/go-hn/pkg/hnapi" ) func PDFToMobiGenerator(db *badger.DB, story *stories.Story, storyItem *hnapi.Story, commentItem *hnapi.Comment, pdfPath string, mobiPath string) { @@ -24,31 +25,15 @@ func PDFToMobiGenerator(db *badger.DB, story *stories.Story, storyItem *hnapi.St if story != nil { out, err = exec.CommandContext(ctx, "ebook-convert", pdfPath+strconv.Itoa(story.Id)+".pdf", mobiPath+strconv.Itoa(story.Id)+".mobi", "--authors=HN to Kindle", "--remove-first-image", "--title="+strings.ReplaceAll(story.Title, `"`, `\"`)).CombinedOutput() - - // if there is an error with our execution - // handle it here - if err != nil { - log.Println("Mobi, Error executing command check the mobiPath ", err) - return - } } else if storyItem != nil { out, err = exec.CommandContext(ctx, "ebook-convert", pdfPath+strconv.Itoa(storyItem.ID)+".pdf", mobiPath+strconv.Itoa(storyItem.ID)+".mobi", "--authors=HN to Kindle", "--remove-first-image", "--title="+strings.ReplaceAll(storyItem.Title, `"`, `\"`)).CombinedOutput() - - // if there is an error with our execution - // handle it here - if err != nil { - log.Println("Mobi, Error executing command check the mobiPath ", err) - return - } } else if commentItem != nil { out, err = exec.CommandContext(ctx, "ebook-convert", pdfPath+strconv.Itoa(commentItem.ID)+".pdf", mobiPath+strconv.Itoa(commentItem.ID)+".mobi", "--authors=HN to Kindle", "--remove-first-image", "--title="+strings.ReplaceAll("Comment by "+commentItem.By, `"`, `\"`)).CombinedOutput() + } - // if there is an error with our execution - // handle it here - if err != nil { - log.Println("Mobi, Error executing command check the mobiPath ", err) - return - } + if err != nil { + log.Println("Mobi, Error executing command check the mobiPath", err) + return } // We want to check the context error to see if the timeout was executed. diff --git a/stories/operations/pdf.go b/stories/operations/pdf.go index d425b06..e4de6e3 100644 --- a/stories/operations/pdf.go +++ b/stories/operations/pdf.go @@ -1,12 +1,13 @@ package operations import ( - wkhtml "github.com/SebastiaanKlippert/go-wkhtmltopdf" - "github.com/dgraph-io/badger/v3" - "github.com/hoenn/go-hn/pkg/hnapi" "hntoebook/stories" "log" "strconv" + + wkhtml "github.com/SebastiaanKlippert/go-wkhtmltopdf" + "github.com/dgraph-io/badger/v3" + "github.com/hoenn/go-hn/pkg/hnapi" ) func HTMLtoPDFGenerator(db *badger.DB, story *stories.Story, storyItem *hnapi.Story, commentItem *hnapi.Comment, pdfPath string, mobiPath string) { @@ -56,22 +57,15 @@ func HTMLtoPDFGenerator(db *badger.DB, story *stories.Story, storyItem *hnapi.St // Write buffer contents to file on disk if story != nil { err = pdfg.WriteFile(pdfPath + strconv.Itoa(story.Id) + ".pdf") - if err != nil { - log.Fatal("PDF Generator, Error writing pdf", err) - return - } } else if storyItem != nil { err = pdfg.WriteFile(pdfPath + strconv.Itoa(storyItem.ID) + ".pdf") - if err != nil { - log.Fatal("PDF Generator, Error writing pdf", err) - return - } } else if commentItem != nil { err = pdfg.WriteFile(pdfPath + strconv.Itoa(commentItem.ID) + ".pdf") - if err != nil { - log.Fatal("PDF Generator, Error writing pdf", err) - return - } + } + + if err != nil { + log.Fatal("PDF Generator, Error writing pdf", err) + return } log.Println("PDF Generator, Creating pdf file: Success") diff --git a/stories/operations/update.go b/stories/operations/update.go index bcb0599..93ac5eb 100644 --- a/stories/operations/update.go +++ b/stories/operations/update.go @@ -3,13 +3,13 @@ package operations import ( "bytes" "encoding/json" + "errors" "fmt" "hntoebook/stories" "io/ioutil" "log" "net/http" "strconv" - "strings" "time" "github.com/dgraph-io/badger/v3" @@ -17,63 +17,60 @@ import ( ) func categoryFilter(story *stories.Story, categories []string) bool { - if len(categories) > 0 { - - var labels []string - labels = append(labels, strings.Join(categories, ",")) + if len(categories) < 1 { + log.Fatalln("Update, User has no categories") + } - type bertRequest struct { - Text string `json:"text"` - Labels []string `json:"labels"` - } + type bertRequest struct { + Text string `json:"text"` + Labels []string `json:"labels"` + } - req := &bertRequest{ - Text: story.Title, - Labels: categories, - } + req := &bertRequest{ + Text: story.Title, + Labels: categories, + } - postBody, err := json.Marshal(req) - if err != nil { - log.Fatalln("Update stories. Error creating POST body for bert") - } + postBody, err := json.Marshal(req) + if err != nil { + log.Fatalln("Update stories. Error creating POST body for bert") + } - responseBody := bytes.NewBuffer(postBody) - //Leverage Go's HTTP Post function to make request - resp, err := http.Post("http://127.0.0.1:8000/classification", "application/json", responseBody) - //Handle Error - if err != nil { - log.Fatalln("Bert, An Error Occurred", err) - } - defer resp.Body.Close() - //Read the response body - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - log.Fatalln("Bert An Error Occurred", err) - } + responseBody := bytes.NewBuffer(postBody) + //Leverage Go's HTTP Post function to make request + resp, err := http.Post("http://127.0.0.1:8000/classification", "application/json", responseBody) + //Handle Error + if err != nil { + log.Fatalln("Bert, An Error Occurred", err) + } + defer resp.Body.Close() + //Read the response body + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Fatalln("Bert An Error Occurred", err) + } - type bertResponse struct { - Labels []string `json:"labels"` - Scores []float64 `json:"scores"'` - } + type bertResponse struct { + Labels []string `json:"labels"` + Scores []float64 `json:"scores"` + } - res := bertResponse{} - json.Unmarshal(body, &res) + res := bertResponse{} + err = json.Unmarshal(body, &res) + if err != nil { + log.Fatalln("Bert, Error unmarshalling json", err) + } - log.Println(fmt.Sprintf("Bert, Title: %s", story.Title)) + log.Printf("Bert, Title: %s\n", story.Title) - for i, score := range res.Scores { - fmt.Printf("Bert, Label: %s, Score: %f\n", res.Labels[i], res.Scores[i]) - if score > 0.75 { - fmt.Printf("Bert, Score matches threshold, Label: %s, Score: %f", res.Labels[i], res.Scores[i]) - return true - } + for i, score := range res.Scores { + fmt.Printf("Bert, Label: %s, Score: %f\n", res.Labels[i], res.Scores[i]) + if score > 0.75 { + fmt.Printf("Bert, Score matches threshold, Label: %s, Score: %f", res.Labels[i], res.Scores[i]) + return true } - - return false - - } else { - log.Fatalln("Update, User has no categories") } + return false } @@ -99,55 +96,69 @@ func UpdateStories(db *badger.DB, pdfPath string, mobiPath string, categories [] err = db.View(func(txn *badger.Txn) error { _, err := txn.Get([]byte(strconv.Itoa(topStory.ID))) + if err == nil { + log.Println("Story already found in db") + return nil + } - if err != nil && err.Error() == "Key not found" { + if errors.Is(err, badger.ErrKeyNotFound) { log.Println("Story was not previously processed") - if topStory.Descendants > 0 { - topCommentID := topStory.Kids[0] - - commentItem, err := c.Item(strconv.Itoa(topCommentID)) + if topStory.Descendants < 1 { + log.Println("Story Update, No descendants to the top story") + return nil + } + topCommentID := topStory.Kids[0] - if err != nil { - log.Fatal("Update: Error fetching comment item", err) - } + commentItem, err := c.Item(strconv.Itoa(topCommentID)) - topComment := commentItem.(*hnapi.Comment) + if err != nil { + log.Fatal("Update: Error fetching comment item", err) + } - log.Println("Update, Top story ID", topStory.ID) - log.Println("Update, Top comment ID", topComment.ID) + topComment := commentItem.(*hnapi.Comment) - if time.Now().Sub(time.Unix(topStory.Time, 0)).Hours() > 9 && time.Now().Sub(time.Unix(topStory.Time, 0)).Hours() < 24 && topStory.Descendants > 20 && time.Now().Sub(time.Unix(topComment.Time, 0)).Hours() > 2 { - log.Println("Time difference of the story: ", time.Now().Sub(time.Unix(topStory.Time, 0)).Hours()) + log.Println("Update, Top story ID", topStory.ID) + log.Println("Update, Top comment ID", topComment.ID) - story := stories.Story{ - Id: topStory.ID, - Time: time.Unix(topStory.Time, 0).UTC(), - Title: topStory.Title, - URL: "https://news.ycombinator.com/item?id=" + strconv.Itoa(topStory.ID), - } + if !includeAsTopStory(topStory, topComment) { + log.Println("Story Update, Top story comment threshold not met or Story not older than 9 hours or Story older than 24 hours") + return nil + } + log.Println("Time difference of the story: ", time.Since(time.Unix(topStory.Time, 0)).Hours()) - log.Println("Created story, Story ID:", story.Id) + story := stories.Story{ + Id: topStory.ID, + Time: time.Unix(topStory.Time, 0).UTC(), + Title: topStory.Title, + URL: "https://news.ycombinator.com/item?id=" + strconv.Itoa(topStory.ID), + } - if categories != nil { - if categoryFilter(&story, categories) { - HTMLtoPDFGenerator(db, &story, nil, nil, pdfPath, mobiPath) - } - } else { - HTMLtoPDFGenerator(db, &story, nil, nil, pdfPath, mobiPath) - } + log.Println("Created story, Story ID:", story.Id) - } else { - log.Println("Story Update, Top story comment threshold not met or Story not older than 9 hours or Story older than 24 hours") + if categories != nil { + if categoryFilter(&story, categories) { + HTMLtoPDFGenerator(db, &story, nil, nil, pdfPath, mobiPath) } } else { - log.Println("Story Update, No descendants to the top story") + HTMLtoPDFGenerator(db, &story, nil, nil, pdfPath, mobiPath) } - } else { - log.Println("Story already found in db") + return nil } - - return nil + return err }) - + if err != nil { + log.Println("Update: Error while updating database", err) + } } } + +// includeAsTopStory returns true if a story meets the following criteria to be categorized as "top": +// - Between 9 and 24 hours old. +// - Has at least 20 comments. +// - Top comment on the story is no more than 2 hours old. +func includeAsTopStory(topStory *hnapi.Story, topComment *hnapi.Comment) bool { + return time.Since(time.Unix(topStory.Time, 0)).Hours() > 9 && + time.Since(time.Unix(topStory.Time, 0)).Hours() < 24 && + topStory.Descendants > 20 && + time.Since(time.Unix(topComment.Time, 0)).Hours() > 2 +}