Skip to content

Commit 224c168

Browse files
committed
Filter-out non-public videos and delete only unsuccessful videos
1 parent f9d4b1e commit 224c168

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

scraper/src/youtube2zim/scraper.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
get_videos_json,
8383
save_channel_branding,
8484
skip_deleted_videos,
85+
skip_non_public_videos,
8586
skip_outofrange_videos,
8687
)
8788

@@ -611,6 +612,7 @@ def extract_videos_list(self):
611612
)
612613
filter_videos = filter(skip_outofrange, videos_json)
613614
filter_videos = filter(skip_deleted_videos, filter_videos)
615+
filter_videos = filter(skip_non_public_videos, filter_videos)
614616
all_videos.update(
615617
{v["contentDetails"]["videoId"]: v for v in filter_videos}
616618
)
@@ -1034,10 +1036,9 @@ def update_metadata(self):
10341036
def make_json_files(self, actual_videos_ids):
10351037
"""Generate JSON files to be consumed by the frontend"""
10361038

1037-
def remove_unused_videos(videos):
1038-
video_ids = [video["contentDetails"]["videoId"] for video in videos]
1039+
def remove_unused_videos():
10391040
for path in self.videos_dir.iterdir():
1040-
if path.is_dir() and path.name not in video_ids:
1041+
if path.is_dir() and path.name not in actual_videos_ids:
10411042
logger.debug(f"Removing unused video {path.name}")
10421043
shutil.rmtree(path, ignore_errors=True)
10431044

@@ -1278,7 +1279,7 @@ def get_playlist_slug(playlist) -> str:
12781279
)
12791280

12801281
# clean videos left out in videos directory
1281-
remove_unused_videos(videos)
1282+
remove_unused_videos()
12821283

12831284
def add_file_to_zim(
12841285
self,

scraper/src/youtube2zim/youtube.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def get_videos_json(playlist_id):
190190
PLAYLIST_ITEMS_API,
191191
params={
192192
"playlistId": playlist_id,
193-
"part": "snippet,contentDetails",
193+
"part": "snippet,contentDetails,status",
194194
"key": YOUTUBE.api_key,
195195
"maxResults": RESULTS_PER_PAGE,
196196
"pageToken": page_token,
@@ -309,6 +309,11 @@ def skip_deleted_videos(item):
309309
)
310310

311311

312+
def skip_non_public_videos(item):
313+
"""filter func to filter-out non-public videos"""
314+
return item["status"]["privacyStatus"] == "public"
315+
316+
312317
def skip_outofrange_videos(date_range, item):
313318
"""filter func to filter-out videos that are not within specified date range"""
314319
return dt_parser.parse(item["snippet"]["publishedAt"]).date() in date_range

0 commit comments

Comments
 (0)