-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgo_dev_scraper.go
63 lines (47 loc) · 1.16 KB
/
go_dev_scraper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
package main
import (
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"gorm.io/gorm"
)
func GoDevScraper(key string, src *Source) ([]Post, error) {
ary := []Post{}
body, err := Scrape("https://go.dev/blog/")
if err != nil {
return ary, err
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
if err != nil {
return ary, err
}
var lastErr error
doc.Find(".blogtitle").Each(func(i int, s *goquery.Selection) {
href, _ := s.Find("a").Attr("href")
if href == "" || href == "/blog/all" {
return
}
var t time.Time
t, lastErr = time.Parse("_2 January 2006", s.Find(".date").Text())
url := "https://go.dev" + href
previousPost, _ := FindPostByUrl(url)
if previousPost.DeletedAt.Time.IsZero() {
post := Post{
Model: gorm.Model{ID: previousPost.ID},
Title: s.Find("a").Text(),
Author: s.Find(".author").Text(),
Url: url,
Source: key,
PublishedAt: t,
}
ary = append(ary, post)
}
})
doc.Find(".blogsummary").Each(func(i int, s *goquery.Selection) {
ary[i].Summary = strings.TrimSpace(s.Text())
})
if lastErr != nil {
return ary, lastErr
}
return ary, nil
}