22package fetcher
33
44import (
5+ "context"
56 "time"
67
78 "github.com/codingpot/paperswithcode-go/v2"
89 "github.com/codingpot/pr12er/metadata-manager/internal/transform"
910 "github.com/codingpot/pr12er/server/pkg/pr12er"
10- "github.com/sirupsen/logrus"
11+ log "github.com/sirupsen/logrus"
1112 "google.golang.org/api/youtube/v3"
1213 "google.golang.org/protobuf/types/known/timestamppb"
1314)
@@ -25,7 +26,7 @@ func (f *Fetcher) fetchArxivPapersInfo(paperArxivIDs []string) ([]*pr12er.Paper,
2526 var pr12erPapers []* pr12er.Paper
2627
2728 for _ , arxivID := range paperArxivIDs {
28- logrus .WithField ("arxivID" , arxivID ).Info ("processing a paper" )
29+ log .WithField ("arxivID" , arxivID ).Info ("processing a paper" )
2930 params := paperswithcode_go .PaperListParamsDefault ()
3031 params .ArxivID = arxivID
3132 papers , err := f .client .PaperList (params )
@@ -68,42 +69,88 @@ func (f *Fetcher) fetchArxivPapersInfo(paperArxivIDs []string) ([]*pr12er.Paper,
6869 return pr12erPapers , nil
6970}
7071
71- func (f * Fetcher ) fetchYouTubeVideoInfo (videoID string ) (* pr12er.YouTubeVideo , error ) {
72- logrus .WithField ("videoID" , videoID ).Info ("fetching YouTube video info" )
73-
74- part := []string {"contentDetails" , "snippet" , "statistics" }
75- call := f .youtubeService .Videos .List (part ).Id (videoID )
76- resp , err := call .Do ()
77- if err != nil {
78- return nil , err
72+ // FetchYouTubeVideos fetches YouTubeVideo and returns a map[PR-ID]Video.
73+ // Because we can't send 200+ IDs requests at once, we use a wrapper function to split by batchSize.
74+ // We need to return the map so that it can plug back to the correct PR video.
75+ func (f * Fetcher ) FetchYouTubeVideos (videoIDToPr map [string ]int32 , batchSize int ) (map [int32 ]* pr12er.YouTubeVideo , error ) {
76+ videoIDs := make ([]string , len (videoIDToPr ))
77+ i := 0
78+ for videoID := range videoIDToPr {
79+ videoIDs [i ] = videoID
80+ i ++
7981 }
8082
81- // make video information
82- youTubeVideo := pr12er.YouTubeVideo {}
83- youTubeVideo .VideoId = videoID
84- if len (resp .Items ) > 0 {
85- youTubeVideo .VideoTitle = resp .Items [0 ].Snippet .Title
83+ ret := make (map [int32 ]* pr12er.YouTubeVideo )
84+
85+ for i := 0 ; i < len (videoIDs ); i += batchSize {
86+ end := i + batchSize
87+ if len (videoIDs ) < end {
88+ end = len (videoIDs )
89+ }
8690
87- ts , err := time .Parse (time .RFC3339 , resp .Items [0 ].Snippet .PublishedAt )
91+ // Get the batch response.
92+ videos , err := f .FetchMultiYouTubeVideoByIDs (videoIDs [i :end ])
8893 if err != nil {
8994 return nil , err
9095 }
91- youTubeVideo .PublishedDate = timestamppb .New (ts )
92- youTubeVideo .NumberOfLikes = int64 (resp .Items [0 ].Statistics .LikeCount )
93- youTubeVideo .NumberOfViews = int64 (resp .Items [0 ].Statistics .ViewCount )
94- youTubeVideo .Uploader = resp .Items [0 ].Snippet .ChannelTitle
96+
97+ for _ , video := range videos {
98+ prID := videoIDToPr [video .GetVideoId ()]
99+ ret [prID ] = video
100+ }
95101 }
96102
97- return & youTubeVideo , nil
103+ return ret , nil
98104}
99105
100- // FetchPrVideo fetches YouTubeVideo and Papers information.
101- func (f * Fetcher ) FetchPrVideo (prRow * pr12er.MappingTableRow ) (* pr12er.PrVideo , error ) {
102- video , err := f .fetchYouTubeVideoInfo (prRow .YoutubeVideoId )
106+ // FetchMultiYouTubeVideoByIDs is a low level function that returns videos by its IDs.
107+ // If there is a next page token, it will iterate each page.
108+ func (f * Fetcher ) FetchMultiYouTubeVideoByIDs (videoIDs []string ) ([]* pr12er.YouTubeVideo , error ) {
109+ log .WithField ("videoIDs" , videoIDs ).Info ("fetching YouTube" )
110+
111+ part := []string {"contentDetails" , "snippet" , "statistics" }
112+
113+ var ret []* pr12er.YouTubeVideo
114+ err := f .youtubeService .Videos .List (part ).Id (videoIDs ... ).
115+ Pages (context .Background (), func (response * youtube.VideoListResponse ) error {
116+ videos , err := handleResponse (response )
117+ if err != nil {
118+ return err
119+ }
120+ ret = append (ret , videos ... )
121+ return nil
122+ })
103123 if err != nil {
104124 return nil , err
105125 }
106126
127+ return ret , nil
128+ }
129+
130+ func handleResponse (resp * youtube.VideoListResponse ) ([]* pr12er.YouTubeVideo , error ) {
131+ ret := make ([]* pr12er.YouTubeVideo , len (resp .Items ))
132+
133+ for i , item := range resp .Items {
134+ ts , err := time .Parse (time .RFC3339 , item .Snippet .PublishedAt )
135+ if err != nil {
136+ return nil , err
137+ }
138+
139+ ret [i ] = & pr12er.YouTubeVideo {
140+ VideoId : item .Id ,
141+ VideoTitle : item .Snippet .Title ,
142+ NumberOfLikes : int64 (item .Statistics .LikeCount ),
143+ NumberOfViews : int64 (item .Statistics .ViewCount ),
144+ PublishedDate : timestamppb .New (ts ),
145+ Uploader : item .Snippet .ChannelTitle ,
146+ }
147+ }
148+
149+ return ret , nil
150+ }
151+
152+ // FetchOnlyPapers fetches papers without video information.
153+ func (f * Fetcher ) FetchOnlyPapers (prRow * pr12er.MappingTableRow ) (* pr12er.PrVideo , error ) {
107154 papers , err := f .fetchArxivPapersInfo (prRow .PaperArxivId )
108155 if err != nil {
109156 return nil , err
@@ -112,6 +159,5 @@ func (f *Fetcher) FetchPrVideo(prRow *pr12er.MappingTableRow) (*pr12er.PrVideo,
112159 return & pr12er.PrVideo {
113160 PrId : prRow .GetPrId (),
114161 Papers : papers ,
115- Video : video ,
116162 }, nil
117163}
0 commit comments