-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
49 lines (39 loc) · 1.29 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
package main
import (
"flag"
"strings"
"time"
"github.com/iondex/scraper-go/config"
"github.com/iondex/scraper-go/page"
"github.com/iondex/scraper-go/requester"
log "github.com/sirupsen/logrus"
)
func parseFlags() {
redisAddr := flag.String("redisAddr", config.RedisAddr, "Connection address of redis server.")
redisPw := flag.String("redisPw", config.RedisPassword, "Redis server AUTH password.")
maxConn := flag.Int("maxConn", config.MaxConcurrent, "Max concurrent goroutines for request.")
reqInterval := flag.Int("sleep", config.RequesterInterval, "Interval between requests in milliseconds.")
flag.Parse()
config.RedisAddr = *redisAddr
config.MaxConcurrent = *maxConn
config.RedisPassword = *redisPw
config.RequesterInterval = *reqInterval
}
func main() {
parseFlags()
logger := log.WithField("module", "main")
r := requester.NewRequester(config.MaxConcurrent)
p := page.NewParser()
r.LinksIn(p.LinksOut())
p.PagesIn(r.PagesOut())
log.SetFormatter(&log.TextFormatter{
ForceColors: true,
})
p.LinkFilter = func(url string) bool {
return strings.HasPrefix(url, "http") && strings.Contains(url, "book.douban.com") && strings.Contains(url, "subject")
}
logger.Info("Crawler started.")
r.AddTask("http://book.douban.com")
time.Sleep(time.Hour)
logger.Info("(Should) Shutdown.")
}