diff --git a/README.md b/README.md index 25f8fc4..c9339b7 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,7 @@ The URLs will be downloaded one by one. | [danbooru.donmai.us](https://danbooru.donmai.us) | :heavy_check_mark: | ? | | [doujin.sexy](https://doujin.sexy) | :heavy_check_mark: | ? | | [e-hentai.org](http://e-hentai.org/) | :heavy_check_mark: | ? | +| [ecchi.iwara.tv](https://ecchi.iwara.tv/) | :heavy_check_mark: |:heavy_check_mark:| | [exhentai.org*](http://exhentai.org/) | :heavy_check_mark: | ? | | [hanime.tv(1080p, 720p, 480p, 360p)](https://hanime.tv) | :heavy_check_mark: |:heavy_check_mark:| | [hentai2read.com](https://hentai2read.com) | :heavy_check_mark: | ? | diff --git a/extractors/extractors.go b/extractors/extractors.go index 7112ffa..5469c1b 100644 --- a/extractors/extractors.go +++ b/extractors/extractors.go @@ -23,6 +23,7 @@ import ( "github.com/gan-of-culture/go-hentai-scraper/extractors/hitomi" "github.com/gan-of-culture/go-hentai-scraper/extractors/htdoujin" "github.com/gan-of-culture/go-hentai-scraper/extractors/htstreaming" + "github.com/gan-of-culture/go-hentai-scraper/extractors/iwara" "github.com/gan-of-culture/go-hentai-scraper/extractors/miohentai" "github.com/gan-of-culture/go-hentai-scraper/extractors/muchohentai" "github.com/gan-of-culture/go-hentai-scraper/extractors/nhentai" @@ -47,16 +48,16 @@ func init() { extractorsMap = map[string]static.Extractor{ "": universal.New(), - "9hentai.to": ninehentaiExtractor, - "www1.9hentai.ru": ninehentaiExtractor, - "booru.io": booru.New(), - "comicporn.xxx": htdoujinExtractor, - "www.damn.stream": damnExtractor, - "damn.stream": damnExtractor, - "danbooru.donmai.us": danbooru.New(), - "doujin.sexy": simplyhentaiExtractor, - "e-hentai.org": ehentai.New(), - //ecchi.iwara.tv/ + "9hentai.to": ninehentaiExtractor, + "www1.9hentai.ru": ninehentaiExtractor, + "booru.io": booru.New(), + "comicporn.xxx": htdoujinExtractor, + "www.damn.stream": damnExtractor, + "damn.stream": damnExtractor, + "danbooru.donmai.us": danbooru.New(), + "doujin.sexy": simplyhentaiExtractor, + "e-hentai.org": ehentai.New(), + "ecchi.iwara.tv": iwara.New(), "exhentai.org": exhentai.New(), "hanime.tv": hanime.New(), "hentai2read.com": hentai2read.New(), diff --git a/extractors/hanime/hanime.go b/extractors/hanime/hanime.go index fae7a68..7ef2f70 100644 --- a/extractors/hanime/hanime.go +++ b/extractors/hanime/hanime.go @@ -154,7 +154,7 @@ func extractData(URL string) (static.Data, error) { streams[fmt.Sprintf("%d", len(streams))] = &static.Stream{ URLs: URLs, - Quality: fmt.Sprintf("%v x %s", stream.Width, stream.Height), + Quality: fmt.Sprintf("%sp; %v x %s", stream.Height, stream.Width, stream.Height), Size: utils.CalcSizeInByte(stream.FileSizeInMBs, "MB"), Info: stream.Filename, Ext: "mp4", diff --git a/extractors/iwara/iwara.go b/extractors/iwara/iwara.go new file mode 100644 index 0000000..74b2f2d --- /dev/null +++ b/extractors/iwara/iwara.go @@ -0,0 +1,184 @@ +package iwara + +import ( + "encoding/json" + "fmt" + "regexp" + "strings" + + "github.com/gan-of-culture/go-hentai-scraper/config" + "github.com/gan-of-culture/go-hentai-scraper/request" + "github.com/gan-of-culture/go-hentai-scraper/static" + "github.com/gan-of-culture/go-hentai-scraper/utils" +) + +type stream struct { + Resolution string + URI string + Mime string +} + +const site = "https://ecchi.iwara.tv/" +const videoAPI = "https://ecchi.iwara.tv/api/video/" + +var reImgSource *regexp.Regexp = regexp.MustCompile(`([^"]+large/public/photos/[^"]+)"(?: width="([^"]*)[^=]+="([^"]*))`) +var reExt *regexp.Regexp = regexp.MustCompile(`(\w+)\?itok=[a-zA-Z\d]+$`) +var reTitle *regexp.Regexp = regexp.MustCompile(`([^|]+)`) +var reVideoID *regexp.Regexp = regexp.MustCompile(`https://ecchi.iwara.tv/videos/(.+)`) + +type extractor struct{} + +// New returns a thehentaiworld extractor. +func New() static.Extractor { + return &extractor{} +} + +func (e *extractor) Extract(URL string) ([]*static.Data, error) { + postIDs := parseURL(URL) + if len(postIDs) == 0 { + return nil, static.ErrURLParseFailed + } + + data := []*static.Data{} + for _, pID := range postIDs { + d, err := extractData(pID) + if err != nil { + return nil, utils.Wrap(err, pID) + } + data = append(data, d...) + } + + return data, nil +} + +func parseURL(URL string) []string { + if ok, _ := regexp.MatchString(site+`(?:videos|images)/`, URL); ok { + return []string{URL} + } + + tmpURL := regexp.MustCompile(`page=\d+`).ReplaceAllString(URL, "page=%d") + if !strings.Contains(tmpURL, "page=%d") { + tmpURL = URL + "&page=%d" + } + + out := []string{} + count := 0 + for i := 0; ; { + htmlString, err := request.Get(fmt.Sprintf(tmpURL, i)) + if err != nil { + return nil + } + if config.Amount > 0 { + fmt.Println(count) + } + + re := regexp.MustCompile(`/(?:videos|images)/[a-zA-Z0-9%=?-]+"`) + matchedURLs := re.FindAllString(htmlString, -1) + + URLs := []string{} + for _, matchedURL := range utils.RemoveAdjDuplicates(matchedURLs) { + URLs = append(URLs, site+strings.Trim(matchedURL, `/"`)) + } + count += len(URLs) + i += 1 + out = append(out, URLs...) + if config.Amount == 0 || count >= config.Amount || len(URLs) == 0 { + break + } + } + + if config.Amount > 0 && len(out) > config.Amount { + out = out[:config.Amount] + } + + return out +} + +func extractData(URL string) ([]*static.Data, error) { + resString, err := request.Get(URL) + if err != nil { + return nil, err + } + + title := utils.GetLastItemString(reTitle.FindStringSubmatch(resString)) + title = title[:len(title)-1] + + matchedImages := reImgSource.FindAllStringSubmatch(resString, -1) + if len(matchedImages) > 0 { + data := []*static.Data{} + for i, img := range matchedImages { + img[1] = "https:" + img[1] + + quality := "" + if len(img) > 2 { + quality = fmt.Sprintf("%s x %s", img[2], img[3]) + } + + size, _ := request.Size(img[1], site) + + data = append(data, &static.Data{ + Site: site, + Type: "image", + Title: fmt.Sprintf("%s_%d", title, i+1), + Streams: map[string]*static.Stream{ + "0": { + URLs: []*static.URL{ + { + URL: img[1], + Ext: reExt.FindStringSubmatch(img[1])[1], + }, + }, + Quality: quality, + Size: size, + }, + }, + Url: URL, + }) + } + return data, nil + } + + videoID := utils.GetLastItemString(reVideoID.FindStringSubmatch(URL)) + if videoID == "" { + return nil, static.ErrURLParseFailed + } + + jsonString, err := request.Get(videoAPI + videoID) + if err != nil { + return nil, err + } + + vStreams := []stream{} + err = json.Unmarshal([]byte(jsonString), &vStreams) + if err != nil { + return nil, err + } + + streams := map[string]*static.Stream{} + for i, stream := range vStreams { + stream.URI = "https:" + stream.URI + + size, _ := request.Size(stream.URI, site) + + streams[fmt.Sprint(i)] = &static.Stream{ + URLs: []*static.URL{ + { + URL: stream.URI, + Ext: utils.GetLastItemString(strings.Split(stream.Mime, "/")), + }, + }, + Quality: stream.Resolution, + Size: size, + } + } + + return []*static.Data{ + { + Site: site, + Title: title, + Type: static.DataTypeVideo, + Streams: streams, + Url: URL, + }, + }, nil +} diff --git a/extractors/iwara/iwara_test.go b/extractors/iwara/iwara_test.go new file mode 100644 index 0000000..4c23c60 --- /dev/null +++ b/extractors/iwara/iwara_test.go @@ -0,0 +1,70 @@ +package iwara + +import ( + "testing" + + "github.com/gan-of-culture/go-hentai-scraper/config" +) + +func TestParseURL(t *testing.T) { + tests := []struct { + name string + url string + want int + }{ + { + name: "Single video", + url: "https://ecchi.iwara.tv/videos/kmnzvsa75uzbaw36?language=en", + want: 1, + }, { + name: "Single images", + url: "https://ecchi.iwara.tv/images/%E6%B9%AF%E4%B8%8A%E3%81%8C%E3%82%8A%E3%82%86%E3%81%84%E3%81%A1%E3%82%83%E3%82%93?language=en", + want: 2, + }, { + name: "Mass", + url: "https://ecchi.iwara.tv/images?language=en&f%5B0%5D=field_image_categories%3A5&page=1", + want: 40, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.name == "Mass" { + config.Amount = 40 + } + + urls := parseURL(tt.url) + if len(urls) > tt.want || len(urls) == 0 { + t.Errorf("Got: %v - want: %v", len(urls), tt.want) + } + }) + } +} + +func TestExtract(t *testing.T) { + tests := []struct { + name string + url string + want int + }{ + { + name: "Single video", + url: "https://ecchi.iwara.tv/videos/kmnzvsa75uzbaw36?language=en", + want: 1, + }, { + name: "Single images", + url: "https://ecchi.iwara.tv/images/%E6%B9%AF%E4%B8%8A%E3%81%8C%E3%82%8A%E3%82%86%E3%81%84%E3%81%A1%E3%82%83%E3%82%93?language=en", + want: 2, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data, err := New().Extract(tt.url) + if err != nil { + t.Error(err) + } + if len(data) > tt.want || len(data) == 0 { + t.Errorf("Got: %v - want: %v", len(data), tt.want) + } + }) + } +}