Skip to content

Commit de6c5ea

Browse files
committed
Async: Use Mutex to prevent data race amongst various goroutines
1 parent 9e7d1b6 commit de6c5ea

File tree

4 files changed

+57
-24
lines changed

4 files changed

+57
-24
lines changed

engine/besthdmovies.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,12 @@ func (engine *BestHDEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, erro
8888
return movie, nil
8989
}
9090

91-
func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
91+
func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
9292
submissionDetails := make(map[string]string)
9393
// Update movie download link if div.post-single-content on page
9494
downloadCollector.OnHTML("div.post-single-content", func(e *colly.HTMLElement) {
95-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
95+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
96+
scrapedMovies.Lock()
9697
ptags := e.ChildTexts("p")
9798
if ptags[len(ptags)-3] >= ptags[len(ptags)-2] {
9899
movie.Description = strings.TrimSpace(ptags[len(ptags)-3])
@@ -116,10 +117,11 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
116117
}
117118
}
118119
}
120+
scrapedMovies.Unlock()
119121
})
120122

121123
downloadCollector.OnHTML("div.content-area", func(e *colly.HTMLElement) {
122-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
124+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
123125
links := e.ChildAttrs("a", "href")
124126
for _, link := range links {
125127
if strings.HasPrefix(link, "https://zeefiles") || strings.HasPrefix(link, "http://zeefiles") {
@@ -129,7 +131,9 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
129131
}
130132
downloadlink, err := url.Parse(link)
131133
if err == nil {
134+
scrapedMovies.Lock()
132135
movie.DownloadLink = downloadlink
136+
scrapedMovies.Unlock()
133137
downloadCollector.Visit(downloadlink.String())
134138
} else {
135139
log.Fatal(err)
@@ -139,12 +143,14 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
139143
})
140144

141145
downloadCollector.OnHTML("div.freeDownload", func(e *colly.HTMLElement) {
142-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
146+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
143147
zeesubmission := make(map[string]string)
144148
if e.ChildAttr("a.link_button", "href") != "" {
145149
downloadlink, err := url.Parse(e.ChildAttr("a.link_button", "href"))
146150
if err == nil {
151+
scrapedMovies.Lock()
147152
movie.DownloadLink = downloadlink
153+
scrapedMovies.Unlock()
148154
}
149155
} else {
150156

@@ -164,7 +170,7 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
164170

165171
downloadCollector.OnHTML("form[method=post]", func(e *colly.HTMLElement) {
166172
var err error
167-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
173+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
168174
downloadlink := movie.DownloadLink
169175
inputNames := e.ChildAttrs("input", "name")
170176
inputValues := e.ChildAttrs("input", "value")
@@ -188,8 +194,10 @@ func (engine *BestHDEngine) updateDownloadProps(downloadCollector *colly.Collect
188194

189195
downloadCollector.OnHTML("video", func(e *colly.HTMLElement) {
190196
downloadlink := e.ChildAttr("source", "src")
191-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
197+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
198+
scrapedMovies.Lock()
192199
movie.DownloadLink, _ = url.Parse(downloadlink)
200+
scrapedMovies.Unlock()
193201
})
194202
}
195203

engine/engines.go

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"net/url"
88
"strconv"
99
"strings"
10+
"sync"
1011

1112
"github.com/gocolly/colly/v2"
1213
// "github.com/gocolly/colly/v2/debug"
@@ -43,7 +44,14 @@ type Engine interface {
4344
getParseAttrs() (string, string, error)
4445

4546
// parseSingleMovie: parses the result of a colly HTMLElement and returns a movie
46-
updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie)
47+
updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped)
48+
}
49+
50+
// All scraped movies are stored here. Since accessed on different goroutine
51+
// Mutex to prevent Data Race
52+
type scraped struct {
53+
movies map[string]*Movie
54+
sync.Mutex
4755
}
4856

4957
// Scrape : Parse queries a url and return results
@@ -58,10 +66,10 @@ func Scrape(engine Engine) ([]Movie, error) {
5866
// Another collector for download Links
5967
downloadLinkCollector := c.Clone()
6068

61-
var movies = make(map[string]*Movie)
69+
scrapedMovies := scraped{movies: make(map[string]*Movie)}
6270

6371
// Any Extras setup for downloads using can be specified in the function
64-
engine.updateDownloadProps(downloadLinkCollector, movies)
72+
engine.updateDownloadProps(downloadLinkCollector, &scrapedMovies)
6573

6674
main, article, err := engine.getParseAttrs()
6775
if err != nil {
@@ -74,7 +82,9 @@ func Scrape(engine Engine) ([]Movie, error) {
7482
log.Errorf("%v could not be parsed", movie)
7583
} else {
7684
// Using DownloadLink as key to movie makes it unique
77-
movies[movie.DownloadLink.String()] = &movie
85+
scrapedMovies.Lock()
86+
scrapedMovies.movies[movie.DownloadLink.String()] = &movie
87+
scrapedMovies.Unlock()
7888
downloadLinkCollector.Visit(movie.DownloadLink.String())
7989
}
8090
})
@@ -94,7 +104,7 @@ func Scrape(engine Engine) ([]Movie, error) {
94104
// movie details when we need it
95105
downloadLinkCollector.OnRequest(func(r *colly.Request) {
96106
r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml")
97-
if movie, ok := movies[r.URL.String()]; ok {
107+
if movie, ok := scrapedMovies.movies[r.URL.String()]; ok {
98108
log.Debugf("Retrieving Download Link %v\n", movie.DownloadLink)
99109
}
100110
})
@@ -109,7 +119,7 @@ func Scrape(engine Engine) ([]Movie, error) {
109119
})
110120

111121
downloadLinkCollector.OnResponse(func(r *colly.Response) {
112-
// movie := movies[r.Request.URL.String()]
122+
// movie := scrapedMovies[r.Request.URL.String()]
113123
// log.Infof("%s %v %s", r.Request.URL.String(), movie.DownloadLink, movie.Title)
114124
// log.Debugf("Retrieved Download Link %v\n", movie.DownloadLink)
115125
})
@@ -119,12 +129,12 @@ func Scrape(engine Engine) ([]Movie, error) {
119129
downloadLinkCollector.Wait()
120130

121131
// Create a List of Movies
122-
v := make([]Movie, 0, len(movies))
132+
v := make([]Movie, 0, len(scrapedMovies.movies))
123133

124-
for _, value := range movies {
134+
for _, value := range scrapedMovies.movies {
125135
v = append(v, *value)
126136
}
127-
prettyPrint(v)
137+
// prettyPrint(v)
128138

129139
return v, nil
130140
}

engine/fzmovies.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,21 +89,24 @@ func (engine *FzEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, error) {
8989
return movie, nil
9090
}
9191

92-
func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
92+
func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
9393
// Update movie download link if ul.downloadlinks on page
9494
downloadCollector.OnHTML("ul.moviesfiles", func(e *colly.HTMLElement) {
95-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
95+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
9696
link := strings.Replace(e.ChildAttr("a", "href"), "download1.php", "download.php", 1)
9797
downloadLink, err := url.Parse(e.Request.AbsoluteURL(link + "&pt=jRGarGzOo2"))
9898
// downloadLink, err := url.Parse(e.ChildAttr("a", "href") + "&pt=jRGarGzOo2")
9999
if err != nil {
100100
log.Fatal(err)
101101
}
102+
103+
scrapedMovies.Lock()
102104
movie.DownloadLink = downloadLink
103105
re := regexp.MustCompile(`(.* MB)`)
104106
dl := strings.TrimPrefix(re.FindStringSubmatch(e.ChildText("dcounter"))[0], "(")
105107
movie.Size = dl
106108
downloadCollector.Visit(downloadLink.String())
109+
scrapedMovies.Unlock()
107110
})
108111

109112
// Update Download Link if "Download" HTML on page
@@ -113,8 +116,10 @@ func (engine *FzEngine) updateDownloadProps(downloadCollector *colly.Collector,
113116
if err != nil {
114117
log.Fatal(err)
115118
}
116-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
119+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
120+
scrapedMovies.Lock()
117121
movie.DownloadLink = downloadLink
122+
scrapedMovies.Unlock()
118123
}
119124
})
120125
}

engine/netnaija.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,32 +114,38 @@ func (engine *NetNaijaEngine) parseSingleMovie(el *colly.HTMLElement) (Movie, er
114114
return movie, nil
115115
}
116116

117-
func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, movies map[string]*Movie) {
117+
func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Collector, scrapedMovies *scraped) {
118118
// Update movie size
119119
downloadCollector.OnHTML("button[id=download-button]", func(e *colly.HTMLElement) {
120-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
120+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
121+
scrapedMovies.Lock()
121122
movie.Size = strings.TrimSpace(e.ChildText("span.size"))
123+
scrapedMovies.Unlock()
122124
})
123125

124126
downloadCollector.OnHTML("h3.file-name", func(e *colly.HTMLElement) {
125127
downloadLink, err := url.Parse(path.Join(strings.TrimSpace(e.ChildAttr("a", "href")), "download"))
126128
if err != nil {
127129
log.Fatal(err)
128130
}
129-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
131+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
132+
scrapedMovies.Lock()
130133
movie.DownloadLink = downloadLink
134+
scrapedMovies.Unlock()
131135
downloadCollector.Visit(downloadLink.String())
132136
})
133137

134138
// Update movie download link if a[id=download] on page
135139
downloadCollector.OnHTML("a[id=download]", func(e *colly.HTMLElement) {
136-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
140+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
141+
scrapedMovies.Lock()
137142
movie.Size = strings.TrimSpace(e.ChildText("span[id=download-size]"))
138143
downloadLink, err := url.Parse(e.Attr("href"))
139144
if err != nil {
140145
log.Fatal(err)
141146
}
142147
movie.DownloadLink = downloadLink
148+
scrapedMovies.Unlock()
143149
})
144150

145151
// Update Download Link if "Direct Download" HTML on page
@@ -149,14 +155,17 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
149155
if err != nil {
150156
log.Fatal(err)
151157
}
152-
movie := getMovieFromMovies(e.Request.URL.String(), movies)
158+
movie := getMovieFromMovies(e.Request.URL.String(), scrapedMovies.movies)
159+
scrapedMovies.Lock()
153160
movie.DownloadLink = downloadLink
161+
scrapedMovies.Unlock()
154162
}
155163
})
156164

157165
//for series or parts
158166
downloadCollector.OnHTML("div.video-series-latest-episodes", func(inn *colly.HTMLElement) {
159-
movie := getMovieFromMovies(inn.Request.URL.String(), movies)
167+
movie := getMovieFromMovies(inn.Request.URL.String(), scrapedMovies.movies)
168+
scrapedMovies.Lock()
160169
movie.IsSeries = true
161170
inn.ForEach("a", func(_ int, e *colly.HTMLElement) {
162171
downloadLink, err := url.Parse(e.Attr("href"))
@@ -166,6 +175,7 @@ func (engine *NetNaijaEngine) updateDownloadProps(downloadCollector *colly.Colle
166175
downloadLink.Path = path.Join(downloadLink.Path, "download")
167176
movie.SDownloadLink = append(movie.SDownloadLink, downloadLink)
168177
})
178+
scrapedMovies.Unlock()
169179
})
170180
}
171181

0 commit comments

Comments
 (0)