added ecchi.iwara.tv

gan-of-culture · Jun 25, 2021 · 886112c · 886112c
1 parent 58335e9
commit 886112c
Show file tree

Hide file tree

Showing 5 changed files with 267 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -171,6 +171,7 @@ The URLs will be downloaded one by one.
 | [danbooru.donmai.us](https://danbooru.donmai.us)                         | :heavy_check_mark: |        ?         |
 | [doujin.sexy](https://doujin.sexy)                                       | :heavy_check_mark: |        ?         |
 | [e-hentai.org](http://e-hentai.org/)                                     | :heavy_check_mark: |        ?         |
+| [ecchi.iwara.tv](https://ecchi.iwara.tv/)                                | :heavy_check_mark: |:heavy_check_mark:|
 | [exhentai.org*](http://exhentai.org/)                                    | :heavy_check_mark: |        ?         |
 | [hanime.tv(1080p, 720p, 480p, 360p)](https://hanime.tv)                  | :heavy_check_mark: |:heavy_check_mark:|
 | [hentai2read.com](https://hentai2read.com)                               | :heavy_check_mark: |        ?         |

diff --git a/extractors/extractors.go b/extractors/extractors.go
@@ -23,6 +23,7 @@ import (
 	"github.com/gan-of-culture/go-hentai-scraper/extractors/hitomi"
 	"github.com/gan-of-culture/go-hentai-scraper/extractors/htdoujin"
 	"github.com/gan-of-culture/go-hentai-scraper/extractors/htstreaming"
+	"github.com/gan-of-culture/go-hentai-scraper/extractors/iwara"
 	"github.com/gan-of-culture/go-hentai-scraper/extractors/miohentai"
 	"github.com/gan-of-culture/go-hentai-scraper/extractors/muchohentai"
 	"github.com/gan-of-culture/go-hentai-scraper/extractors/nhentai"
@@ -47,16 +48,16 @@ func init() {
 	extractorsMap = map[string]static.Extractor{
 		"": universal.New(),
 
-		"9hentai.to":         ninehentaiExtractor,
-		"www1.9hentai.ru":    ninehentaiExtractor,
-		"booru.io":           booru.New(),
-		"comicporn.xxx":      htdoujinExtractor,
-		"www.damn.stream":    damnExtractor,
-		"damn.stream":        damnExtractor,
-		"danbooru.donmai.us": danbooru.New(),
-		"doujin.sexy":        simplyhentaiExtractor,
-		"e-hentai.org":       ehentai.New(),
-		//ecchi.iwara.tv/
+		"9hentai.to":            ninehentaiExtractor,
+		"www1.9hentai.ru":       ninehentaiExtractor,
+		"booru.io":              booru.New(),
+		"comicporn.xxx":         htdoujinExtractor,
+		"www.damn.stream":       damnExtractor,
+		"damn.stream":           damnExtractor,
+		"danbooru.donmai.us":    danbooru.New(),
+		"doujin.sexy":           simplyhentaiExtractor,
+		"e-hentai.org":          ehentai.New(),
+		"ecchi.iwara.tv":        iwara.New(),
 		"exhentai.org":          exhentai.New(),
 		"hanime.tv":             hanime.New(),
 		"hentai2read.com":       hentai2read.New(),

diff --git a/extractors/hanime/hanime.go b/extractors/hanime/hanime.go
@@ -154,7 +154,7 @@ func extractData(URL string) (static.Data, error) {
 
 		streams[fmt.Sprintf("%d", len(streams))] = &static.Stream{
 			URLs:    URLs,
-			Quality: fmt.Sprintf("%v x %s", stream.Width, stream.Height),
+			Quality: fmt.Sprintf("%sp; %v x %s", stream.Height, stream.Width, stream.Height),
 			Size:    utils.CalcSizeInByte(stream.FileSizeInMBs, "MB"),
 			Info:    stream.Filename,
 			Ext:     "mp4",

diff --git a/extractors/iwara/iwara.go b/extractors/iwara/iwara.go
@@ -0,0 +1,184 @@
+package iwara
+
+import (
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"strings"
+
+	"github.com/gan-of-culture/go-hentai-scraper/config"
+	"github.com/gan-of-culture/go-hentai-scraper/request"
+	"github.com/gan-of-culture/go-hentai-scraper/static"
+	"github.com/gan-of-culture/go-hentai-scraper/utils"
+)
+
+type stream struct {
+	Resolution string
+	URI        string
+	Mime       string
+}
+
+const site = "https://ecchi.iwara.tv/"
+const videoAPI = "https://ecchi.iwara.tv/api/video/"
+
+var reImgSource *regexp.Regexp = regexp.MustCompile(`([^"]+large/public/photos/[^"]+)"(?: width="([^"]*)[^=]+="([^"]*))`)
+var reExt *regexp.Regexp = regexp.MustCompile(`(\w+)\?itok=[a-zA-Z\d]+$`)
+var reTitle *regexp.Regexp = regexp.MustCompile(`<title>([^|]+)`)
+var reVideoID *regexp.Regexp = regexp.MustCompile(`https://ecchi.iwara.tv/videos/(.+)`)
+
+type extractor struct{}
+
+// New returns a thehentaiworld extractor.
+func New() static.Extractor {
+	return &extractor{}
+}
+
+func (e *extractor) Extract(URL string) ([]*static.Data, error) {
+	postIDs := parseURL(URL)
+	if len(postIDs) == 0 {
+		return nil, static.ErrURLParseFailed
+	}
+
+	data := []*static.Data{}
+	for _, pID := range postIDs {
+		d, err := extractData(pID)
+		if err != nil {
+			return nil, utils.Wrap(err, pID)
+		}
+		data = append(data, d...)
+	}
+
+	return data, nil
+}
+
+func parseURL(URL string) []string {
+	if ok, _ := regexp.MatchString(site+`(?:videos|images)/`, URL); ok {
+		return []string{URL}
+	}
+
+	tmpURL := regexp.MustCompile(`page=\d+`).ReplaceAllString(URL, "page=%d")
+	if !strings.Contains(tmpURL, "page=%d") {
+		tmpURL = URL + "&page=%d"
+	}
+
+	out := []string{}
+	count := 0
+	for i := 0; ; {
+		htmlString, err := request.Get(fmt.Sprintf(tmpURL, i))
+		if err != nil {
+			return nil
+		}
+		if config.Amount > 0 {
+			fmt.Println(count)
+		}
+
+		re := regexp.MustCompile(`/(?:videos|images)/[a-zA-Z0-9%=?-]+"`)
+		matchedURLs := re.FindAllString(htmlString, -1)
+
+		URLs := []string{}
+		for _, matchedURL := range utils.RemoveAdjDuplicates(matchedURLs) {
+			URLs = append(URLs, site+strings.Trim(matchedURL, `/"`))
+		}
+		count += len(URLs)
+		i += 1
+		out = append(out, URLs...)
+		if config.Amount == 0 || count >= config.Amount || len(URLs) == 0 {
+			break
+		}
+	}
+
+	if config.Amount > 0 && len(out) > config.Amount {
+		out = out[:config.Amount]
+	}
+
+	return out
+}
+
+func extractData(URL string) ([]*static.Data, error) {
+	resString, err := request.Get(URL)
+	if err != nil {
+		return nil, err
+	}
+
+	title := utils.GetLastItemString(reTitle.FindStringSubmatch(resString))
+	title = title[:len(title)-1]
+
+	matchedImages := reImgSource.FindAllStringSubmatch(resString, -1)
+	if len(matchedImages) > 0 {
+		data := []*static.Data{}
+		for i, img := range matchedImages {
+			img[1] = "https:" + img[1]
+
+			quality := ""
+			if len(img) > 2 {
+				quality = fmt.Sprintf("%s x %s", img[2], img[3])
+			}
+
+			size, _ := request.Size(img[1], site)
+
+			data = append(data, &static.Data{
+				Site:  site,
+				Type:  "image",
+				Title: fmt.Sprintf("%s_%d", title, i+1),
+				Streams: map[string]*static.Stream{
+					"0": {
+						URLs: []*static.URL{
+							{
+								URL: img[1],
+								Ext: reExt.FindStringSubmatch(img[1])[1],
+							},
+						},
+						Quality: quality,
+						Size:    size,
+					},
+				},
+				Url: URL,
+			})
+		}
+		return data, nil
+	}
+
+	videoID := utils.GetLastItemString(reVideoID.FindStringSubmatch(URL))
+	if videoID == "" {
+		return nil, static.ErrURLParseFailed
+	}
+
+	jsonString, err := request.Get(videoAPI + videoID)
+	if err != nil {
+		return nil, err
+	}
+
+	vStreams := []stream{}
+	err = json.Unmarshal([]byte(jsonString), &vStreams)
+	if err != nil {
+		return nil, err
+	}
+
+	streams := map[string]*static.Stream{}
+	for i, stream := range vStreams {
+		stream.URI = "https:" + stream.URI
+
+		size, _ := request.Size(stream.URI, site)
+
+		streams[fmt.Sprint(i)] = &static.Stream{
+			URLs: []*static.URL{
+				{
+					URL: stream.URI,
+					Ext: utils.GetLastItemString(strings.Split(stream.Mime, "/")),
+				},
+			},
+			Quality: stream.Resolution,
+			Size:    size,
+		}
+	}
+
+	return []*static.Data{
+		{
+			Site:    site,
+			Title:   title,
+			Type:    static.DataTypeVideo,
+			Streams: streams,
+			Url:     URL,
+		},
+	}, nil
+}
diff --git a/extractors/iwara/iwara_test.go b/extractors/iwara/iwara_test.go
@@ -0,0 +1,70 @@
+package iwara
+
+import (
+	"testing"
+
+	"github.com/gan-of-culture/go-hentai-scraper/config"
+)
+
+func TestParseURL(t *testing.T) {
+	tests := []struct {
+		name string
+		url  string
+		want int
+	}{
+		{
+			name: "Single video",
+			url:  "https://ecchi.iwara.tv/videos/kmnzvsa75uzbaw36?language=en",
+			want: 1,
+		}, {
+			name: "Single images",
+			url:  "https://ecchi.iwara.tv/images/%E6%B9%AF%E4%B8%8A%E3%81%8C%E3%82%8A%E3%82%86%E3%81%84%E3%81%A1%E3%82%83%E3%82%93?language=en",
+			want: 2,
+		}, {
+			name: "Mass",
+			url:  "https://ecchi.iwara.tv/images?language=en&f%5B0%5D=field_image_categories%3A5&page=1",
+			want: 40,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.name == "Mass" {
+				config.Amount = 40
+			}
+
+			urls := parseURL(tt.url)
+			if len(urls) > tt.want || len(urls) == 0 {
+				t.Errorf("Got: %v - want: %v", len(urls), tt.want)
+			}
+		})
+	}
+}
+
+func TestExtract(t *testing.T) {
+	tests := []struct {
+		name string
+		url  string
+		want int
+	}{
+		{
+			name: "Single video",
+			url:  "https://ecchi.iwara.tv/videos/kmnzvsa75uzbaw36?language=en",
+			want: 1,
+		}, {
+			name: "Single images",
+			url:  "https://ecchi.iwara.tv/images/%E6%B9%AF%E4%B8%8A%E3%81%8C%E3%82%8A%E3%82%86%E3%81%84%E3%81%A1%E3%82%83%E3%82%93?language=en",
+			want: 2,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			data, err := New().Extract(tt.url)
+			if err != nil {
+				t.Error(err)
+			}
+			if len(data) > tt.want || len(data) == 0 {
+				t.Errorf("Got: %v - want: %v", len(data), tt.want)
+			}
+		})
+	}
+}