-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
126 lines (94 loc) · 4.23 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from pytube import Playlist, YouTube, Stream, StreamQuery
import re
from tools.extract_links_from_pdf import extractLinksFromPDF
#! DISCLAIMER
# all youtube playlist and video urls must lead to public or unlisted youtube content
# if you are trying to reach a private youtube playlist or video wont work as expected
def main():
# DOWNLOAD VIDEOS FROM YOUTUBE PLAYLIST
# use following line to extract youtube videos from a youtube playlist
# TODO edit following url for your platlist
videos = Playlist("https://www.youtube.com/playlist?list=[playlist-ID]")
# example
# videos = Playlist(
# "https://www.youtube.com/playlist?list=PLEboAksnoN7UtpR7gJzCsZFCPbOo3ur0s"
# )
# download path of videos to be downloaded to
# TODO edit following string for where videos to be downloaded to
# use following line for windows
downloadPath = "C:\\Path\\To\\Your\\Download\\Folder\\"
# example
# downloadPath = "C:\\Users\\Berkin\\Desktop\\YoutubeDownloads\\ee302-vids\\"
# use following line for macOS or linux
# downloadPath = "/path/to/your/download/folder/"
# file name prefix to be added all downloaded videos
# TODO edit following string for file name prefix
fileNamePrefix = "file-name-prefix"
# example
# fileNamePrefix = "ee302-2020s"
# EXTRACT YOUTUBE LINKS FROM PDF
# you can extract youtube links from pdf and you the array returned to download youtube videos
#! DISCLAIMER
# script does not search for youtube links as plain text
# it only looks for anchors of youtube links as hyperlinks
# if you have a pdf that contains clickable valid urls of youtube links you can use this
# there is a validation regex used for validating youtube vid links
# it is not guaranteed to be working flawlessly
# TODO uncomment following line and edit pdf path to extract videos from pdf
# Windows
# videos = extractLinksFromPDF("C:\\Path\\To\\example-pdf-containing-youtube-video-url-anchors.pdf")
# videos = extractLinksFromPDF("C:\\Users\\Berkin\\Desktop\\link-pdf.pdf")
# MacOS or Linux
# videos = extractLinksFromPDF("/path/to/example-pdf-containing-youtube-video-url-anchors.pdf")
# CREATE YOUR OWN LIST OF VIDEOS
# you might use this type of an array of youtube video links to download
# TODO uncomment following lines and create your own array of youtube vid links
# videos = [
# "https://www.youtube.com/watch?v=[video-id1]",
# "https://www.youtube.com/watch?v=[video-id2]",
# ]
# counter of videos downloaded
# you might change this to number whether you want counter index to be start from
count = 1
for vidLink in videos:
ytVid = YouTube(vidLink)
# registering progress callback
global previousProgress
previousProgress = 0
print(ytVid.title)
ytVid.register_on_progress_callback(on_progress)
result = ytVid.streams.get_highest_resolution()
fileName = (
fileNamePrefix
+ f"-{'0'+str(count) if count < 10 else count}-"
+ result.title.lower()
.replace("metu", "") #remove unwanted words
.replace("2020", "") #or numbers
.replace("odtu", "metu") #or replace words
#examples
# .replace("ee302", "")
# .replace("feedback systems", "")
# .replace("- (Lectures on Feedback Control Systems)".lower(), "")
# .replace("spring 2021", "")
+ ".mp4"
)
invalidChars = '<>:;_,"/\|?* '
# removing invalids chars from name and replacing with -
for char in invalidChars:
fileName = fileName.replace(char, "-")
# remove dash repetition
fileName = re.sub("\-+", "-", fileName)
print(fileName)
result.download(downloadPath, fileName)
count += 1
# callback to follow downloading progress
previousProgress = 0
def on_progress(stream, chunk, bytesRemaining):
global previousProgress
totalSize = stream.filesize
bytesDownloaded = totalSize - bytesRemaining
liveProgress = (int)(bytesDownloaded / totalSize * 100)
if liveProgress > previousProgress:
previousProgress = liveProgress
print(liveProgress)
main()