From de695dce6c0cc5b030bbde2fe6febb08051d8042 Mon Sep 17 00:00:00 2001 From: gan-of-culture Date: Thu, 24 Aug 2023 14:41:28 +0200 Subject: [PATCH] [extractor:hstream] updated extractor for new website --- extractors/hstream/hstream.go | 108 +++++++++++++++++++++++++++-- extractors/hstream/hstream_test.go | 13 +++- request/request.go | 8 +-- request/request_test.go | 2 +- 4 files changed, 117 insertions(+), 14 deletions(-) diff --git a/extractors/hstream/hstream.go b/extractors/hstream/hstream.go index b0f9310..4d05693 100644 --- a/extractors/hstream/hstream.go +++ b/extractors/hstream/hstream.go @@ -1,7 +1,13 @@ package hstream import ( + "bytes" + "encoding/json" + "errors" "fmt" + "io" + "net/http" + "net/url" "regexp" "strings" "time" @@ -10,11 +16,26 @@ import ( "github.com/gan-of-culture/get-sauce/request" "github.com/gan-of-culture/get-sauce/static" "github.com/gan-of-culture/get-sauce/utils" + "golang.org/x/exp/slices" ) const site = "https://hstream.moe/" +const api = "https://hstream.moe/player/api" +const fileProvider = "https://str.h-dl.xyz" + +type APIResponse struct { + Title string `json:"title"` + Poster string `json:"poster"` + Legacy int `json:"legacy"` + Resolution string `json:"resolution"` + StreamURL string `json:"stream_url"` +} + +type APIPayload struct { + EpisodeID string `json:"episode_id"` +} -var reVideoSources = regexp.MustCompile(`https://.+/\d+/[\w.]+/[\w./]+\.(?:mpd|mp4|webm)`) +var reEpisodeID = regexp.MustCompile(`e_id" type="hidden" value="([^"]*)`) var reCaptionSource = regexp.MustCompile(`https://.+/\d+/[\w.]+/[\w./]+\.ass`) type extractor struct{} @@ -44,8 +65,8 @@ func parseURL(URL string) []string { return []string{URL} } - if ok, _ := regexp.MatchString(site+`hentai/[\w\-]+/?`, URL); !ok { - return []string{} + if ok, _ := regexp.MatchString(site+`hentai/[\w\-]+/?`, URL); ok { + return []string{URL} } htmlString, err := request.Get(URL) @@ -64,17 +85,90 @@ func parseURL(URL string) []string { } func extractData(URL string) (*static.Data, error) { - htmlString, err := request.Get(URL) + resp, err := request.Request(http.MethodGet, URL, nil, nil) if err != nil { return nil, err } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + if err != io.ErrUnexpectedEOF { + return nil, err + } + } + + htmlString := string(body) + cookies := resp.Cookies() + xsrf := cookies[slices.IndexFunc(cookies, func(cookie *http.Cookie) bool { + return cookie.Name == "XSRF-TOKEN" + })] + hstreamSession := cookies[slices.IndexFunc(cookies, func(cookie *http.Cookie) bool { + return cookie.Name == "hstream_session" + })] if strings.Contains(htmlString, "DDOS-GUARD") { time.Sleep(300 * time.Millisecond) htmlString, _ = request.Get(URL) } - videoSources := reverse(reVideoSources.FindAllString(htmlString, -1)) + matchedEpisodeID := reEpisodeID.FindStringSubmatch(htmlString) + if len(matchedEpisodeID) < 2 { + return nil, errors.New("cannot find e_id for") + } + + payload := APIPayload{EpisodeID: strings.TrimSpace(matchedEpisodeID[1])} + payloadBytes, err := json.Marshal(payload) + if err != nil { + return nil, err + } + + xsrfValueEscaped, err := url.PathUnescape(xsrf.Value) + if err != nil { + return nil, err + } + + jsonString, err := request.PostAsBytesWithHeaders(api, map[string]string{ + "Content-Length": fmt.Sprint(len(payloadBytes)), + "Content-Type": "application/json", + "Cookie": fmt.Sprintf("%s=%s; %s=%s", xsrf.Name, xsrf.Value, hstreamSession.Name, hstreamSession.Value), + "Referer": URL, + "X-Requested-With": "XMLHttpRequest", + "X-Xsrf-Token": xsrfValueEscaped, + }, bytes.NewReader(payloadBytes)) + if err != nil { + return nil, err + } + + res := APIResponse{} + err = json.Unmarshal(jsonString, &res) + if err != nil { + return nil, err + } + + videoSourceBaseURL := fmt.Sprintf("%s/%s", fileProvider, res.StreamURL) + videoSources := []string{ + videoSourceBaseURL + "/x264.720p.mp4", + } + + if res.Resolution == "1080p" { + videoSources = append(videoSources, videoSourceBaseURL+"/av1.1080p.webm") + } + + if res.Resolution == "4k" { + videoSources = append(videoSources, videoSourceBaseURL+"/av1.1080p.webm") + videoSources = append(videoSources, videoSourceBaseURL+"/av1.2160p.webm") + } + + if res.Legacy == 0 { + videoSources = append(videoSources, []string{ + videoSourceBaseURL + "/720/manifest.mpd", + videoSourceBaseURL + "/1080/manifest.mpd", + videoSourceBaseURL + "/2160/manifest.mpd", + }...) + } + + videoSources = reverse(videoSources) streams := make(map[string]*static.Stream) counter := 0 @@ -127,11 +221,11 @@ func extractData(URL string) (*static.Data, error) { } } - captionURL := reCaptionSource.FindString(htmlString) + captionURL := videoSourceBaseURL + "/eng.ass" return &static.Data{ Site: site, - Title: utils.GetSectionHeadingElement(&htmlString, 6, 0), + Title: strings.TrimSpace(utils.GetSectionHeadingElement(&htmlString, 1, 0)), Type: static.DataTypeVideo, Streams: streams, Captions: []*static.Caption{ diff --git a/extractors/hstream/hstream_test.go b/extractors/hstream/hstream_test.go index bf5125c..7132f77 100644 --- a/extractors/hstream/hstream_test.go +++ b/extractors/hstream/hstream_test.go @@ -47,9 +47,18 @@ func TestExtract(t *testing.T) { }{ { Name: "Single Episode 4k", + Args: test.Args{ + URL: "https://hstream.moe/hentai/natural-vacation-the-animation-1", + Title: "Natural Vacation The Animation - 1", + Quality: "2880x1920", + Size: 804660690, + }, + }, + { + Name: "Single Episode 4k legacy", Args: test.Args{ URL: "https://hstream.moe/hentai/wizard-girl-ambitious/1", - Title: "Wizard Girl Ambitious – 1", + Title: "Wizard Girl Ambitious - 1", Quality: "av1.2160p.webm", Size: 804660690, }, @@ -58,7 +67,7 @@ func TestExtract(t *testing.T) { Name: "Single Episode", Args: test.Args{ URL: "https://hstream.moe/hentai/maki-chan-to-now/1", - Title: "Maki-chan to Now. – 1", + Title: "Maki-chan to Now. - 1", Quality: "av1.1080p.webm", Size: 180211896, }, diff --git a/request/request.go b/request/request.go index 89629ec..473ee65 100755 --- a/request/request.go +++ b/request/request.go @@ -173,21 +173,21 @@ func GetAsBytesWithHeaders(URL string, headers map[string]string) ([]byte, error } // PostAsBytesWithHeaders content as bytes -func PostAsBytesWithHeaders(URL string, headers map[string]string) ([]byte, error) { - resp, err := Request(http.MethodPost, URL, headers, nil) +func PostAsBytesWithHeaders(URL string, headers map[string]string, body io.Reader) ([]byte, error) { + resp, err := Request(http.MethodPost, URL, headers, body) if err != nil { return nil, err } defer resp.Body.Close() - body, err := io.ReadAll(resp.Body) + resBody, err := io.ReadAll(resp.Body) if err != nil { if err != io.ErrUnexpectedEOF { return nil, err } } - return body, nil + return resBody, nil } // GetWithCookies content as string diff --git a/request/request_test.go b/request/request_test.go index a914ea7..c77958e 100755 --- a/request/request_test.go +++ b/request/request_test.go @@ -32,7 +32,7 @@ func TestGet(t *testing.T) { func TestPost(t *testing.T) { t.Run("Default test", func(t *testing.T) { - data, err := PostAsBytesWithHeaders("https://www.google.com/", map[string]string{"Referer": "https://google.com"}) + data, err := PostAsBytesWithHeaders("https://www.google.com/", map[string]string{"Referer": "https://google.com"}, nil) test.CheckError(t, err) if len(data) < 1 {