Skip to content

Commit

Permalink
[extractor:hstream] updated extractor for new website
Browse files Browse the repository at this point in the history
  • Loading branch information
gan-of-culture committed Aug 24, 2023
1 parent 6ff9b19 commit de695dc
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 14 deletions.
108 changes: 101 additions & 7 deletions extractors/hstream/hstream.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
package hstream

import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"time"
Expand All @@ -10,11 +16,26 @@ import (
"github.com/gan-of-culture/get-sauce/request"
"github.com/gan-of-culture/get-sauce/static"
"github.com/gan-of-culture/get-sauce/utils"
"golang.org/x/exp/slices"
)

const site = "https://hstream.moe/"
const api = "https://hstream.moe/player/api"
const fileProvider = "https://str.h-dl.xyz"

type APIResponse struct {
Title string `json:"title"`
Poster string `json:"poster"`
Legacy int `json:"legacy"`
Resolution string `json:"resolution"`
StreamURL string `json:"stream_url"`
}

type APIPayload struct {
EpisodeID string `json:"episode_id"`
}

var reVideoSources = regexp.MustCompile(`https://.+/\d+/[\w.]+/[\w./]+\.(?:mpd|mp4|webm)`)
var reEpisodeID = regexp.MustCompile(`e_id" type="hidden" value="([^"]*)`)
var reCaptionSource = regexp.MustCompile(`https://.+/\d+/[\w.]+/[\w./]+\.ass`)

type extractor struct{}
Expand Down Expand Up @@ -44,8 +65,8 @@ func parseURL(URL string) []string {
return []string{URL}
}

if ok, _ := regexp.MatchString(site+`hentai/[\w\-]+/?`, URL); !ok {
return []string{}
if ok, _ := regexp.MatchString(site+`hentai/[\w\-]+/?`, URL); ok {
return []string{URL}
}

htmlString, err := request.Get(URL)
Expand All @@ -64,17 +85,90 @@ func parseURL(URL string) []string {
}

func extractData(URL string) (*static.Data, error) {
htmlString, err := request.Get(URL)
resp, err := request.Request(http.MethodGet, URL, nil, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
if err != io.ErrUnexpectedEOF {
return nil, err
}
}

htmlString := string(body)
cookies := resp.Cookies()
xsrf := cookies[slices.IndexFunc(cookies, func(cookie *http.Cookie) bool {
return cookie.Name == "XSRF-TOKEN"
})]
hstreamSession := cookies[slices.IndexFunc(cookies, func(cookie *http.Cookie) bool {
return cookie.Name == "hstream_session"
})]

if strings.Contains(htmlString, "<title>DDOS-GUARD</title>") {
time.Sleep(300 * time.Millisecond)
htmlString, _ = request.Get(URL)
}

videoSources := reverse(reVideoSources.FindAllString(htmlString, -1))
matchedEpisodeID := reEpisodeID.FindStringSubmatch(htmlString)
if len(matchedEpisodeID) < 2 {
return nil, errors.New("cannot find e_id for")
}

payload := APIPayload{EpisodeID: strings.TrimSpace(matchedEpisodeID[1])}
payloadBytes, err := json.Marshal(payload)
if err != nil {
return nil, err
}

xsrfValueEscaped, err := url.PathUnescape(xsrf.Value)
if err != nil {
return nil, err
}

jsonString, err := request.PostAsBytesWithHeaders(api, map[string]string{
"Content-Length": fmt.Sprint(len(payloadBytes)),
"Content-Type": "application/json",
"Cookie": fmt.Sprintf("%s=%s; %s=%s", xsrf.Name, xsrf.Value, hstreamSession.Name, hstreamSession.Value),
"Referer": URL,
"X-Requested-With": "XMLHttpRequest",
"X-Xsrf-Token": xsrfValueEscaped,
}, bytes.NewReader(payloadBytes))
if err != nil {
return nil, err
}

res := APIResponse{}
err = json.Unmarshal(jsonString, &res)
if err != nil {
return nil, err
}

videoSourceBaseURL := fmt.Sprintf("%s/%s", fileProvider, res.StreamURL)
videoSources := []string{
videoSourceBaseURL + "/x264.720p.mp4",
}

if res.Resolution == "1080p" {
videoSources = append(videoSources, videoSourceBaseURL+"/av1.1080p.webm")
}

if res.Resolution == "4k" {
videoSources = append(videoSources, videoSourceBaseURL+"/av1.1080p.webm")
videoSources = append(videoSources, videoSourceBaseURL+"/av1.2160p.webm")
}

if res.Legacy == 0 {
videoSources = append(videoSources, []string{
videoSourceBaseURL + "/720/manifest.mpd",
videoSourceBaseURL + "/1080/manifest.mpd",
videoSourceBaseURL + "/2160/manifest.mpd",
}...)
}

videoSources = reverse(videoSources)

streams := make(map[string]*static.Stream)
counter := 0
Expand Down Expand Up @@ -127,11 +221,11 @@ func extractData(URL string) (*static.Data, error) {
}
}

captionURL := reCaptionSource.FindString(htmlString)
captionURL := videoSourceBaseURL + "/eng.ass"

return &static.Data{
Site: site,
Title: utils.GetSectionHeadingElement(&htmlString, 6, 0),
Title: strings.TrimSpace(utils.GetSectionHeadingElement(&htmlString, 1, 0)),
Type: static.DataTypeVideo,
Streams: streams,
Captions: []*static.Caption{
Expand Down
13 changes: 11 additions & 2 deletions extractors/hstream/hstream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,18 @@ func TestExtract(t *testing.T) {
}{
{
Name: "Single Episode 4k",
Args: test.Args{
URL: "https://hstream.moe/hentai/natural-vacation-the-animation-1",
Title: "Natural Vacation The Animation - 1",
Quality: "2880x1920",
Size: 804660690,
},
},
{
Name: "Single Episode 4k legacy",
Args: test.Args{
URL: "https://hstream.moe/hentai/wizard-girl-ambitious/1",
Title: "Wizard Girl Ambitious 1",
Title: "Wizard Girl Ambitious - 1",
Quality: "av1.2160p.webm",
Size: 804660690,
},
Expand All @@ -58,7 +67,7 @@ func TestExtract(t *testing.T) {
Name: "Single Episode",
Args: test.Args{
URL: "https://hstream.moe/hentai/maki-chan-to-now/1",
Title: "Maki-chan to Now. 1",
Title: "Maki-chan to Now. - 1",
Quality: "av1.1080p.webm",
Size: 180211896,
},
Expand Down
8 changes: 4 additions & 4 deletions request/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,21 +173,21 @@ func GetAsBytesWithHeaders(URL string, headers map[string]string) ([]byte, error
}

// PostAsBytesWithHeaders content as bytes
func PostAsBytesWithHeaders(URL string, headers map[string]string) ([]byte, error) {
resp, err := Request(http.MethodPost, URL, headers, nil)
func PostAsBytesWithHeaders(URL string, headers map[string]string, body io.Reader) ([]byte, error) {
resp, err := Request(http.MethodPost, URL, headers, body)
if err != nil {
return nil, err
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
resBody, err := io.ReadAll(resp.Body)
if err != nil {
if err != io.ErrUnexpectedEOF {
return nil, err
}
}

return body, nil
return resBody, nil
}

// GetWithCookies content as string
Expand Down
2 changes: 1 addition & 1 deletion request/request_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func TestGet(t *testing.T) {

func TestPost(t *testing.T) {
t.Run("Default test", func(t *testing.T) {
data, err := PostAsBytesWithHeaders("https://www.google.com/", map[string]string{"Referer": "https://google.com"})
data, err := PostAsBytesWithHeaders("https://www.google.com/", map[string]string{"Referer": "https://google.com"}, nil)
test.CheckError(t, err)

if len(data) < 1 {
Expand Down

0 comments on commit de695dc

Please sign in to comment.