forked from kemuvx/hianime-dl
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
339 lines (281 loc) · 15.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
import shutil
import requests
from bs4 import BeautifulSoup
from colorama import Fore
from seleniumbase import Driver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import json
import yt_dlp
import time
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'
}
# FUNCTION TO VALIDATE USER INPUT FOR EPISODE NUMBER
def get_episode_input(prompt, min_value, max_value):
while True:
try:
episode_no = int(input(prompt))
if min_value <= episode_no <= max_value:
return episode_no
else:
print(f"Please enter a number between {min_value} and {max_value}.")
except ValueError:
print("Invalid input. Please enter a valid number.")
def choose_resolution():
print("\nChoose resolution:\n\n"
"1." + Fore.LIGHTYELLOW_EX + " 1080p" + Fore.LIGHTWHITE_EX + " (FHD)\n2." + Fore.LIGHTYELLOW_EX + " 720p " +
Fore.LIGHTWHITE_EX + "(HD)\n3. " + Fore.LIGHTYELLOW_EX + "360p " + Fore.LIGHTWHITE_EX + " (SD)\n")
resolution_map = {1: 1080, 2: 720, 3: 360}
while True:
try:
choice = int(input("Enter Choice: "))
if choice in [1, 2, 3]:
resolution_height = resolution_map[choice]
return resolution_height
else:
print("Invalid choice. Please enter 1, 2, or 3.")
except ValueError:
print("Invalid input. Please enter a number (1, 2, or 3).")
def get_rid_of_bad_chars(word):
bad_chars = ['-', '.', '/', '\\', '?', '%', '*', '<', '>', '|', '"', "[", "]", ":"]
for char in bad_chars:
word = word.replace(char, '')
return word
def get_urls_to_animes_from_html(html_of_page, start_episode, end_episode):
episode_info_list = []
soup = BeautifulSoup(html_of_page, 'html.parser')
# Find all episode links with the attribute data-number
links = soup.find_all('a', attrs={'data-number': True})
for link in links:
episode_number = int(link.get('data-number'))
if start_episode <= episode_number <= end_episode:
url = "https://hianime.to" + link['href']
episode_title = link.get('title')
episode_info = {
'url': url,
'number': int(episode_number),
'title': episode_title,
'M3U8': None # Initialize M3U8 field as None
}
episode_info_list.append(episode_info)
return episode_info_list
class Main:
def __init__(self):
print(Fore.LIGHTGREEN_EX + "HiAnime " + Fore.LIGHTWHITE_EX + "Downloader")
name_of_anime = input("Enter Name of Anime: ")
# GET ANIME ELEMENTS FROM PAGE
url = "https://hianime.to/search?keyword=" + name_of_anime
search_page_response = requests.get(url, headers=headers)
search_page_soup = BeautifulSoup(search_page_response.content, 'html.parser')
main_content = search_page_soup.find('div', id='main-content')
anime_elements = main_content.find_all('div', class_='flw-item')
if not anime_elements:
print("No anime found")
return # Exit if no anime is found
# MAKE DICT WITH ANIME TITLES
dict_with_anime_elements = {}
for i, element in enumerate(anime_elements, 1):
name_of_anime = get_rid_of_bad_chars(element.find('h3', class_='film-name').text)
url_of_anime = "https://hianime.to" + element.find('a', class_='film-poster-ahref item-qtip')['href']
try:
# Some anime have no subs
sub_episodes_available = element.find('div', class_="tick-item tick-sub").text
except AttributeError:
sub_episodes_available = 0
try:
dub_episodes_available = element.find('div', class_="tick-item tick-dub").text
except AttributeError:
dub_episodes_available = 0
dict_with_anime_elements[i] = {
'name': name_of_anime,
'url': url_of_anime,
'sub_episodes': int(sub_episodes_available),
'dub_episodes': int(dub_episodes_available)
}
# PRINT ANIME TITLES TO THE CONSOLE
for i, el in dict_with_anime_elements.items():
print(
Fore.LIGHTRED_EX + str(i) + ": " + Fore.LIGHTCYAN_EX + el['name'] + Fore.WHITE + " | " + "Episodes: " +
Fore.LIGHTYELLOW_EX + str(
el['sub_episodes']) + Fore.LIGHTWHITE_EX + " sub" + Fore.LIGHTGREEN_EX + " / " +
Fore.LIGHTYELLOW_EX + str(el['dub_episodes']) + Fore.LIGHTWHITE_EX + " dub")
# USER SELECTS ANIME
while True:
try:
number_of_anime = int(input("\nSelect an anime you want to download: "))
if number_of_anime in dict_with_anime_elements:
chosen_anime_dict = dict_with_anime_elements[number_of_anime]
break
else:
print("Invalid anime number. Please select a valid anime.")
except ValueError:
print("Invalid input. Please enter a valid number.")
# Display chosen anime details
print("\nYou have chosen " + Fore.LIGHTCYAN_EX + chosen_anime_dict['name'] + Fore.LIGHTWHITE_EX)
print(f"URL: {chosen_anime_dict['url']}")
print("Sub Episodes: " + Fore.LIGHTYELLOW_EX + str(chosen_anime_dict['sub_episodes']) + Fore.LIGHTWHITE_EX)
print("Dub Episodes: " + Fore.LIGHTYELLOW_EX + str(chosen_anime_dict['dub_episodes']) + Fore.LIGHTWHITE_EX)
download_type = 'sub'
if chosen_anime_dict['dub_episodes'] != 0 and chosen_anime_dict['sub_episodes'] != 0:
download_type = input(
"\nBoth sub and dub episodes are available. Do you want to download sub or dub? (Enter 'sub' or 'dub'): ").strip().lower()
while download_type not in ['sub', 'dub']:
print("Invalid choice. Please enter 'sub' or 'dub'.")
download_type = input(
"\nBoth sub and dub episodes are available. Do you want to download sub or dub? (Enter 'sub' or 'dub'): ").strip().lower()
elif chosen_anime_dict['dub_episodes'] == 0:
print("Dub episodes are not available. Defaulting to sub.")
else:
print("Sub episodes are not available. Defaulting to dub.")
download_type = "dub"
# Get starting and ending episode numbers
if chosen_anime_dict[f"{download_type}_episodes"] != "1":
start_episode = get_episode_input("Enter the starting episode number: ", 1,
chosen_anime_dict[f"{download_type}_episodes"])
end_episode = get_episode_input("Enter the ending episode number: ", start_episode,
chosen_anime_dict[f"{download_type}_episodes"])
else:
start_episode = 1
end_episode = 1
# CHROME DRIVER
try:
print("Opening chrome driver...")
driver = Driver(mobile=True, wire=True, headed=True,
extension_zip='extensions/CJPALHDLNBPAFIAMEJDNHCPHJBKEIAGM_1_58_0_0.zip')
# CONNECT TO THE HIANIME
print("Connecting to the website...\n")
driver.get(chosen_anime_dict['url'])
# Select sub or dub server based on user choice
if download_type == 'sub':
server_button_xpath = "//div[@class='ps_-block ps_-block-sub servers-sub']//a[contains(text(), 'HD-1')]"
else:
server_button_xpath = "//div[@class='ps_-block ps_-block-sub servers-dub']//a[contains(text(), 'HD-1')]"
try:
# Wait until the button is present in the DOM and visible
server_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, server_button_xpath))
)
# Scroll the element into view
driver.execute_script("arguments[0].scrollIntoView(true);", server_button)
# Attempt to click the button
server_button.click()
except Exception as e:
# Handle click interception
print(f"Error selecting server: {e}")
# Attempt to find and remove overlay
try:
overlay_element = driver.find_element(By.XPATH, "//div[contains(@style, 'z-index: 2147483647')]")
driver.execute_script("arguments[0].style.display='none'", overlay_element)
except Exception as e:
print(f"No overlay found to remove: {e}")
# Try clicking the button again
server_button.click()
# GET URLS OF EPISODES
episode_info_list = get_urls_to_animes_from_html(driver.page_source, start_episode, end_episode)
# START SCRAPING URI'S TO .M3U8 AND .VTT
# LIST OF POSSIBLE SUBTITLES LANGUAGES
lang_list = (
'ita', 'jpn', 'pol', 'por', 'ara', 'chi', 'cze', 'dan', 'dut', 'fin', 'fre', 'ger', 'gre', 'heb', 'hun',
'ind',
'kor', 'nob', 'pol', 'rum', 'rus', 'tha', 'vie', 'swe', 'spa', 'tur')
used_vtt_uri_list = []
used_m3u8_uri_list = []
counter_for_reload_page = 0
os.makedirs("vtt_files/" + chosen_anime_dict['name'], exist_ok=True)
for episode in episode_info_list:
url = episode['url']
number = episode['number']
title = episode['title']
print(
Fore.LIGHTGREEN_EX + "Get" + Fore.LIGHTWHITE_EX + f" Episode {number}..." + Fore.LIGHTWHITE_EX)
driver.get(url)
uri_to_m3u8_is_scraped = False
uri_to_vtt_is_scraped = False
max_retries = 5
retry_count = 0
start_time = None
timeout_duration = 10 # Timeout duration in seconds
while not (uri_to_m3u8_is_scraped and (uri_to_vtt_is_scraped or retry_count >= max_retries)):
driver.sleep(3)
# FIND M3U8 IN NETWORK REQUESTS
for request in driver.requests:
uri = request.url
try:
if uri.endswith(
'master.m3u8') and uri_to_m3u8_is_scraped is False and uri not in used_m3u8_uri_list and "biananset" in uri:
print(Fore.LIGHTGREEN_EX + 'Found' + Fore.LIGHTWHITE_EX + ' master.m3u8')
uri_to_m3u8_is_scraped = True
used_m3u8_uri_list.append(uri)
episode['M3U8'] = uri
except Exception as e:
print(f"Error processing URI: {uri}, {e}")
# FIND VTT IN NETWORK REQUESTS
start_time = time.time()
for request in driver.requests:
uri = request.url
if uri.endswith(".vtt") and "thumbnails" not in uri and not uri_to_vtt_is_scraped and not any(
ele in uri for ele in lang_list) and uri not in used_vtt_uri_list:
print(Fore.LIGHTGREEN_EX + "Found " + Fore.LIGHTWHITE_EX + "subtitles vtt")
used_vtt_uri_list.append(uri)
uri_to_vtt_is_scraped = True
with open(os.path.join("vtt_files", chosen_anime_dict['name'],
f"{chosen_anime_dict['name']} - Episode {number} - {title}.vtt"),
'wb') as subs_file:
content_of_uri = requests.get(uri, headers=headers).content
subs_file.write(content_of_uri)
# Check for timeout
if time.time() - start_time > timeout_duration:
print(f"Timeout reached for Episode {number}, proceeding without VTT.")
uri_to_vtt_is_scraped = True
retry_count += 1
counter_for_reload_page += 1
if counter_for_reload_page == 5:
counter_for_reload_page = 0
driver.get(url)
except Exception as e:
raise e
finally:
driver.quit()
print("Driver closed")
os.makedirs("json", exist_ok=True)
json_filename = os.path.join("json", f"{chosen_anime_dict['name']}.json")
with open(json_filename, 'w') as f:
json.dump(episode_info_list, f, indent=4)
print("\nEpisode information exported to " + Fore.LIGHTMAGENTA_EX + json_filename + Fore.LIGHTWHITE_EX)
# DOWNLOAD MP4 FROM M3U8
download_or_no = input("Do you want to download the episodes?\nType 0 to exit: ")
if download_or_no == "0": quit(0)
folder_name = chosen_anime_dict['name']
output_folder = os.path.join('./mp4_out', folder_name)
os.makedirs(output_folder, exist_ok=True)
resolution_height = choose_resolution()
print(f"You chose: " + Fore.LIGHTYELLOW_EX + f"{resolution_height}p" + Fore.LIGHTWHITE_EX + " resolution\n")
print(f"\nOutput Folder:" + Fore.LIGHTMAGENTA_EX + f" {output_folder}\n\n" + Fore.LIGHTWHITE_EX)
try:
ydl = yt_dlp.YoutubeDL()
for episode in episode_info_list:
url = episode['M3U8']
number = episode['number']
title = episode['title']
ydl_opts = {
'no_warnings': True,
'quiet': True,
'outtmpl': os.path.join(output_folder, f"{folder_name} - Episode {number} - {title}.mp4"),
'format': f'bestvideo[height<={resolution_height}]+bestaudio/best[height<={resolution_height}]',
}
print(Fore.LIGHTGREEN_EX + "Downloading " + Fore.LIGHTCYAN_EX + f"{folder_name}"+ Fore.LIGHTWHITE_EX + " - Episode" + Fore.LIGHTYELLOW_EX + f" {number}" + Fore.LIGHTWHITE_EX + " - " + f" {title}.mp4.")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
print(Fore.LIGHTGREEN_EX + "Downloaded " + Fore.LIGHTCYAN_EX + f"{folder_name}"+ Fore.LIGHTWHITE_EX + " - Episode" + Fore.LIGHTYELLOW_EX + f" {number}" + Fore.LIGHTWHITE_EX + " - "f" {title}.mp4 " "successfully.")
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
Main()