From 00a38bf998663da0c664c1c5e529d4b47d1971d5 Mon Sep 17 00:00:00 2001 From: Alessandro Clerici Date: Sat, 15 May 2021 00:54:49 +0200 Subject: [PATCH] more solid regex system, fixed Silab issue no3 --- unimi_dl/__init__.py | 2 +- unimi_dl/platform/ariel.py | 16 ++++++---------- unimi_dl/platform/panopto.py | 17 ++++++++++++----- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/unimi_dl/__init__.py b/unimi_dl/__init__.py index 47eefde..4fecbc2 100644 --- a/unimi_dl/__init__.py +++ b/unimi_dl/__init__.py @@ -16,7 +16,7 @@ # along with unimi-dl. If not, see . -__version__ = "0.2.1" +__version__ = "0.2.3" __license__ = "GPL v.3" import unimi_dl.platform diff --git a/unimi_dl/platform/ariel.py b/unimi_dl/platform/ariel.py index b244e32..28553b4 100644 --- a/unimi_dl/platform/ariel.py +++ b/unimi_dl/platform/ariel.py @@ -47,15 +47,11 @@ def get_manifests(self, url: str) -> list[tuple[str, str]]: self.logger.info("Getting video page") video_page = self.session.get(url).text - self.logger.info("Collecting manifests") - manifest_re = re.compile(r"https://.*/manifest\.m3u8") - match = manifest_re.findall(video_page) - + self.logger.info("Collecting manifests and video names") res = [] - filename_re = re.compile( - r"https://videolectures.unimi.it/vod/mp4:(.*?)\..*?/manifest.m3u8") - self.logger.info("Fetching video names") - for manifest in match: - filename = urllib.parse.unquote(filename_re.search(manifest)[1]) - res.append((filename, manifest)) + manifest_re = re.compile( + r"https://.*?/mp4:.*?([^/]*?)\.mp4/manifest.m3u8") + for i, manifest in enumerate(manifest_re.finditer(video_page)): + res.append((urllib.parse.unquote( + manifest[1]) if manifest[1] else urllib.parse.urlparse(url)[1]+str(i), manifest[0])) return res diff --git a/unimi_dl/platform/panopto.py b/unimi_dl/platform/panopto.py index 90ee7ea..99074bb 100644 --- a/unimi_dl/platform/panopto.py +++ b/unimi_dl/platform/panopto.py @@ -22,6 +22,7 @@ import requests from urllib3 import disable_warnings +import urllib.parse from urllib3.exceptions import InsecureRequestWarning from .ariel import get_ariel_session @@ -53,11 +54,17 @@ def get_manifests(self, url: str) -> list[tuple[str, str]]: manifest_page = self.session.get(iframe_url).text self.logger.info("Collecting manifests") - manifest_re = re.compile(r"\"VideoUrl\":\"(https:.*?\.m3u8)\"") - manifest = manifest_re.search(manifest_page)[1].replace("\\", "") + manifest = re.compile( + r"\"VideoUrl\":\"(https:.*?\.m3u8)\"").search(manifest_page) + if not manifest: + self.logger.info("No manifest found") + return [] self.logger.info("Fetching video names") - title_re = re.compile(r"(.*?)") - filename = title_re.search(manifest_page)[1] + filename_match = re.compile( + r"(.*?)").search(manifest_page) - return [(filename, manifest)] + filename = filename_match[1] if filename_match and filename_match[1] else urllib.parse.urlparse(url)[ + 1] + + return [(filename, manifest[1].replace("\\", ""))]