-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
173 lines (146 loc) · 3.81 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
package main
import (
"crypto/tls"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"strconv"
"strings"
"sync"
"github.com/gocolly/colly"
"github.com/gocolly/colly/extensions"
"github.com/labstack/gommon/color"
)
var arguments = struct {
Input string
Output string
Concurrency int
RandomUA bool
Verbose bool
GetAlbums bool
GetVideos bool
StartID int
StopID int
}{}
var client = http.Client{}
var checkPre = color.Yellow("[") + color.Green("✓") + color.Yellow("]")
var tildPre = color.Yellow("[") + color.Green("~") + color.Yellow("]")
var crossPre = color.Yellow("[") + color.Red("✗") + color.Yellow("]")
func init() {
// Disable HTTP/2: Empty TLSNextProto map
client.Transport = http.DefaultTransport
client.Transport.(*http.Transport).TLSNextProto =
make(map[string]func(authority string, c *tls.Conn) http.RoundTripper)
}
func downloadEPUB(link, name, index string) (err error) {
// Replace slash
name = strings.Replace(name, "/", "-", -1)
// Check if file exist
if _, err := os.Stat(arguments.Output + "/" + index + "-" + name + ".epub"); !os.IsNotExist(err) {
fmt.Println(checkPre +
color.Yellow("[") +
color.Green(index) +
color.Yellow("]") +
color.Green(" Already downloaded: ") +
color.Yellow(name))
return err
}
// Create output dir
os.MkdirAll(arguments.Output+"/", os.ModePerm)
// Fetch the data from the URL
resp, err := client.Get(link)
if err != nil {
fmt.Println(crossPre+color.Red(" Unable to download the file:"), color.Yellow(err))
text := []byte(link + "\n")
err = ioutil.WriteFile("./error.txt", text, 0644)
if err != nil {
return err
}
return nil
}
defer resp.Body.Close()
// Create ebook's file
pictureFile, err := os.Create(arguments.Output + "/" + index + "-" + name + ".epub")
if err != nil {
log.Println(crossPre+color.Red(" Unable to create the file:"), color.Yellow(err))
text := []byte(link + "\n")
err = ioutil.WriteFile("./error.txt", text, 0644)
if err != nil {
return err
}
return err
}
defer pictureFile.Close()
// Write the data to the file
_, err = io.Copy(pictureFile, resp.Body)
if err != nil {
log.Println(crossPre+color.Red(" Unable to write to the file:"), color.Yellow(err))
text := []byte(link + "\n")
err = ioutil.WriteFile("./error.txt", text, 0644)
if err != nil {
return err
}
return err
}
fmt.Println(checkPre +
color.Yellow("[") +
color.Green(index) +
color.Yellow("]") +
color.Green(" Downloaded: ") +
color.Yellow(name))
return nil
}
func scrapeBookPage(url string, index int, worker *sync.WaitGroup) {
defer worker.Done()
var name, epubLink string
var links []string
// Create collector
c := colly.NewCollector()
// Randomize user agent on every request
if arguments.RandomUA == true {
extensions.RandomUserAgent(c)
}
// Get book's name
c.OnHTML("div.header", func(e *colly.HTMLElement) {
name = e.ChildText("h1")
})
// Get download link
c.OnHTML("tbody", func(e *colly.HTMLElement) {
links = append(links, e.ChildAttrs("tr.even", "about")...)
for _, link := range links {
if strings.Contains(link, "epub.images") {
epubLink = "http:" + link
}
}
})
c.Visit(url)
if len(name) < 1 || len(epubLink) < 1 {
return
}
err := downloadEPUB(epubLink, name, strconv.Itoa(index))
if err != nil {
return
}
}
func main() {
var worker sync.WaitGroup
var count int
// Parse arguments and fill the arguments structure
parseArgs(os.Args)
// Set maxIdleConnsPerHost
client.Transport.(*http.Transport).MaxIdleConnsPerHost = arguments.Concurrency
for index := arguments.StartID; index <= arguments.StopID; index++ {
worker.Add(1)
count++
url := "http://www.gutenberg.org/ebooks/" + strconv.Itoa(index)
go scrapeBookPage(url, index, &worker)
if count == arguments.Concurrency {
worker.Wait()
count = 0
}
}
worker.Wait()
}