diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..27ca760 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [year] [fullname] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..c568d5a --- /dev/null +++ b/README.rst @@ -0,0 +1,45 @@ +.. image:: https://img.shields.io/pypi/v/0.1.0 :alt: PyPI + + +m3u8-To-MP4 +============ + +Python downloader for saving m3u8 video to local MP4 file. + +QuickStart +============= + + +Install m3u8_To_MP4 by pip +--------------------------------------- + +Configure ffmpeg_. + +.. code-block:: python + + pip install m3u8_To_MP4 + + +Download a mp4 vidoe from a m3u8 uri +--------------------------------------- + +To download a m3u8 video into a mp4 file, use the `download` functions: + +.. code-block:: python + + import m3u8_to_mp4 + + m3u8_to_mp4.download('http://videoserver.com/playlist.m3u8') + + + +Resume the transfer from the point of interruption, use the `tmpdir` arguement: + +.. code-block:: python + + import m3u8_to_mp4 + + m3u8_to_mp4.download('http://videoserver.com/playlist.m3u8',tmpdir='/tmp/m3u8_xx') + + +.. _ffmpeg: http://www.ffmpeg.org/download.html \ No newline at end of file diff --git a/m3u8_To_MP4/__init__.py b/m3u8_To_MP4/__init__.py new file mode 100644 index 0000000..b2c269c --- /dev/null +++ b/m3u8_To_MP4/__init__.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +""" +m3u8ToMP4 +~~~~~~~~~~~~ + +Basic usage: + +import m3u8_to_mp4 +m3u8_to_mp4.download("https://xxx.com/xxx/index.m3u8") + +""" +import subprocess + +test_has_ffmpeg_cmd = "ffmpeg -version" + +proc = subprocess.Popen(test_has_ffmpeg_cmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) +outs, errs = proc.communicate() +output_text = outs.decode('utf8') + +if 'version' not in output_text: + raise Exception('NOT FOUND FFMPEG!') + +import logging + +logging.basicConfig(format='%(asctime)s | %(levelname)s | %(message)s', + level=logging.INFO) + +import m3u8_To_MP4.processor +from m3u8_To_MP4.processor import Crawler + +__all__ = ( + "Crawler", + "download" +) + + +def download(m3u8_uri, max_retry_times=3, max_num_workers=100, + mp4_file_dir='./', mp4_file_name='m3u8_To_Mp4.mp4', tmpdir=None): + ''' + Download mp4 video from given m3u uri. + + :param m3u8_uri: m3u8 uri + :param max_retry_times: max retry times + :param max_num_workers: number of download threads + :param mp4_file_dir: folder path where mp4 file is stored + :param mp4_file_name: a mp4 file name with suffix ".mp4" + :return: + ''' + with m3u8_To_MP4.processor.Crawler(m3u8_uri, max_retry_times, + max_num_workers, mp4_file_dir, + mp4_file_name, tmpdir) as crawler: + crawler.fetch_mp4_by_m3u8_uri() diff --git a/m3u8_To_MP4/processor.py b/m3u8_To_MP4/processor.py new file mode 100644 index 0000000..25db8d1 --- /dev/null +++ b/m3u8_To_MP4/processor.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- +import collections +import concurrent.futures +import logging +import os +import shutil +import subprocess +import sys +import tempfile +import time + +import m3u8 +from Crypto.Cipher import AES + +from m3u8_To_MP4 import utils +from m3u8_To_MP4.weber import request_for + + +def download_segment(segment_url): + is_successful, response_content = request_for(segment_url) + + return is_successful, response_content + + +EncryptedKey = collections.namedtuple(typename='EncryptedKey', + field_names=['method', 'value', 'iv']) + + +class Crawler(object): + def __init__(self, m3u8_uri, max_retry_times=3, max_num_workers=100, + mp4_file_dir='./', mp4_file_name='m3u8_To_Mp4.mp4', + tmpdir=None): + self.m3u8_uri = m3u8_uri + + self.max_retry_times = max_retry_times + + self.max_num_workers = max_num_workers + + self.tmpdir = tmpdir + self.fetched_file_names = list() + + self.mp4_file_dir = mp4_file_dir + self.mp4_file_name = mp4_file_name + self.mp4_file_path = None + + def __enter__(self): + if self.tmpdir is None: + self._apply_for_tmpdir() + + self.fetched_file_names = os.listdir(self.tmpdir) + + self._legalize_valid_mp4_file_path() + + print('\nsummary:') + print( + 'm3u8_uri: {};\nmax_retry_times: {};\nmax_num_workers: {};\ntmp_dir: {};\nmp4_file_path: {}\n'.format( + self.m3u8_uri, self.max_retry_times, self.max_num_workers, + self.tmpdir, self.mp4_file_path)) + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._freeup_tmpdir() + + def _apply_for_tmpdir(self): + self.tmpdir = tempfile.mkdtemp(prefix='m3u8_') + + def _freeup_tmpdir(self): + if os.path.exists(self.tmpdir): + shutil.rmtree(self.tmpdir) + + def _legalize_valid_mp4_file_path(self): + is_valid, mp4_file_name = utils.calibrate_mp4_file_name( + self.mp4_file_name) + if not is_valid: + mp4_file_name = utils.create_mp4_file_name() + + mp4_file_path = os.path.join(self.mp4_file_dir, mp4_file_name) + if os.path.exists(mp4_file_path): + mp4_file_name = utils.create_mp4_file_name() + mp4_file_path = os.path.join(self.mp4_file_dir, mp4_file_name) + + self.mp4_file_path = mp4_file_path + + def _get_m3u8_obj_by_uri(self, m3u8_uri): + try: + m3u8_obj = m3u8.load(uri=m3u8_uri) + except Exception as exc: + logging.exception( + 'failed to load m3u8 file,reason is {}'.format(exc)) + raise Exception('FAILED TO LOAD M3U8 FILE!') + + return m3u8_obj + + def _get_m3u8_obj_with_best_bandwitdth(self, m3u8_uri): + m3u8_obj = self._get_m3u8_obj_by_uri(m3u8_uri) + + if m3u8_obj.is_variant: + best_bandwidth = -1 + best_bandwidth_m3u8_uri = None + for playlist in m3u8_obj.playlists: + if playlist.stream_info.bandwidth > best_bandwidth: + best_bandwidth = playlist.stream_info.bandwidth + best_bandwidth_m3u8_uri = playlist.absolute_uri + + logging.info( + "choose the best bandwith, which is {}".format(best_bandwidth)) + logging.info("m3u8 uri is {}".format(best_bandwidth_m3u8_uri)) + + m3u8_obj = self._get_m3u8_obj_by_uri(best_bandwidth_m3u8_uri) + + return m3u8_obj + + def _is_fetched(self, segment_uri): + file_name = utils.resolve_file_name_by_uri(segment_uri) + + if file_name in self.fetched_file_names: + return True + + return False + + def _construct_key_segment_pairs_by_m3u8(self, m3u8_obj): + key_segments_pairs = list() + for key in m3u8_obj.keys: + if key: + is_successful, encryped_value = request_for(key.absolute_uri, + max_try_times=self.max_retry_times) + if not is_successful: + raise Exception('DOWNLOAD KEY FAILED, URI IS {}'.format( + key.absolute_uri)) + + _encrypted_key = EncryptedKey(method=key.method, + value=encryped_value, iv=key.iv) + + key_segments = m3u8_obj.segments.by_key(key) + segments_by_key = [segment.absolute_uri for segment in + key_segments if + not self._is_fetched(segment.absolute_uri)] + + key_segments_pairs.append((_encrypted_key, segments_by_key)) + + if len(key_segments_pairs) == 0: + _encrypted_key = None + + key_segments = m3u8_obj.segments + segments_by_key = [segment.absolute_uri for segment in + key_segments if + not self._is_fetched(segment.absolute_uri)] + + key_segments_pairs.append((_encrypted_key, segments_by_key)) + + return key_segments_pairs + + def _fetch_segments_to_local_tmpdir(self, num_segments, + key_segments_pairs): + if len(self.fetched_file_names) >= num_segments: + return + + progress_bar = utils.ProcessBar(len(self.fetched_file_names), + num_segments, 'segment set', + 'downloading...', + 'downloaded segments successfully!') + + for encrypted_key, segments_by_key in key_segments_pairs: + segment_url_to_encrypted_content = list() + + with concurrent.futures.ThreadPoolExecutor( + max_workers=self.max_num_workers) as executor: + while len(segments_by_key) > 0: + future_2_segment_uri = {executor.submit(download_segment, + segment_url): segment_url + for segment_url in segments_by_key} + + for future in concurrent.futures.as_completed( + future_2_segment_uri): + segment_uri = future_2_segment_uri[future] + try: + request_is_successful, response_content = future.result() + except Exception as exc: + logging.exception( + '{} generated an exception: {}'.format( + segment_uri, exc)) + + if request_is_successful: + segment_url_to_encrypted_content.append( + (segment_uri, response_content)) + + segments_by_key.remove(segment_uri) + progress_bar.update() + + if len(segments_by_key) > 0: + sys.stdout.write('\n') + logging.info( + '{} segments are failed to download, retry...'.format( + len(segments_by_key))) + + logging.info('decrypt and dump segments...') + for segment_url, encrypted_content in segment_url_to_encrypted_content: + file_name = utils.resolve_file_name_by_uri(segment_url) + file_path = os.path.join(self.tmpdir, file_name) + + if encrypted_key is not None: + crypt_ls = {"AES-128": AES} + crypt_obj = crypt_ls[encrypted_key.method] + cryptor = crypt_obj.new(encrypted_key.value, + crypt_obj.MODE_CBC) + encrypted_content = cryptor.decrypt(encrypted_content) + + with open(file_path, 'wb') as fin: + fin.write(encrypted_content) + + def _merge_tmpdir_segments_to_mp4_by_ffmpeg(self, m3u8_obj): + order_segment_list_file_path = os.path.join(self.tmpdir, "ts_ls.txt") + with open(order_segment_list_file_path, 'w', encoding='utf8') as fin: + for segment in m3u8_obj.segments: + file_name = utils.resolve_file_name_by_uri(segment.uri) + segment_file_path = os.path.join(self.tmpdir, file_name) + + fin.write("file '{}'\n".format(segment_file_path)) + + merge_cmd = "ffmpeg -y -f concat -safe 0 -i " + '"' + order_segment_list_file_path + '"' + " -c copy " \ + + '"' + self.mp4_file_path + '"' + + p = subprocess.Popen(merge_cmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + logging.info("merging segments...") + p.communicate() + + def fetch_mp4_by_m3u8_uri(self): + m3u8_obj = self._get_m3u8_obj_with_best_bandwitdth(self.m3u8_uri) + + key_segments_pairs = self._construct_key_segment_pairs_by_m3u8( + m3u8_obj) + + start_time = time.time() + self._fetch_segments_to_local_tmpdir(len(m3u8_obj.segments), + key_segments_pairs) + fetch_end_time = time.time() + + self._merge_tmpdir_segments_to_mp4_by_ffmpeg(m3u8_obj) + task_end_time = time.time() + + if len(self.fetched_file_names) < len(m3u8_obj.segments): + utils.display_speed(start_time, fetch_end_time, task_end_time, + self.mp4_file_path) diff --git a/m3u8_To_MP4/utils.py b/m3u8_To_MP4/utils.py new file mode 100644 index 0000000..33dcd24 --- /dev/null +++ b/m3u8_To_MP4/utils.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +import datetime +import logging +import os +import re +import sys + + +class ProcessBar: + def __init__(self, progress, max_iter, prefix='Progress', + suffix='complete', + completed_suffix='completed', bar_length=50): + self.progress = progress + self.max_iter = max_iter + + self.bar_length = bar_length + + self.prefix = prefix + self.suffix = suffix + + self.completed_suffix = completed_suffix + + def display(self): + progress_rate = self.progress / self.max_iter + + percent = 100 * progress_rate + + filled_length = round(self.bar_length * progress_rate) + bar = '#' * filled_length + '-' * (self.bar_length - filled_length) + + sys.stdout.write( + '\r{}: |{}| {:.1f}% {}'.format(self.prefix, bar, percent, + self.suffix)) + + if self.progress == self.max_iter: + sys.stdout.write( + '\r{}: |{}| {:.1f}% {}'.format(self.prefix, bar, percent, + self.completed_suffix)) + sys.stdout.write('\n') + + sys.stdout.flush() + + def update(self): + self.progress += 1 + + self.display() + + +def resolve_file_name_by_uri(uri): + pattern = r".*\/(.*)" + file_name = re.findall(pattern=pattern, string=uri)[0] + return file_name + + +def display_speed(start_time, fetch_end_time, task_end_time, + target_mp4_file_path): + download_time = fetch_end_time - start_time + total_time = task_end_time - start_time + + download_speed = os.path.getsize( + target_mp4_file_path) / download_time / 1024 + + logging.info( + "download successfully! take {:.2f}s, average download speed is {:.2f}KB/s".format( + total_time, download_speed)) + + +def calibrate_mp4_file_name(mp4_file_name): + if mp4_file_name.strip() == '': + return False, None + + banned_ls = ['\\', '/', ':', '*', '?', '"', '<', '>', '|'] + + for ch in banned_ls: + mp4_file_name = mp4_file_name.replace(ch, '') + + return True, mp4_file_name + + +def create_mp4_file_name(): + mp4_file_name = 'm3u8_To_Mp4_{}.mp4'.format( + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + return mp4_file_name diff --git a/m3u8_To_MP4/weber.py b/m3u8_To_MP4/weber.py new file mode 100644 index 0000000..f315ed4 --- /dev/null +++ b/m3u8_To_MP4/weber.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +import urllib.request +import urllib.response + + +def get_headers(): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36' + } + return headers + + +def request_for(url, max_try_times=1, headers=None, data=None, timeout=30, + proxy_ip=None, verify=False): + is_successful = False + response_content = None + + for num_retry in range(max_try_times): + if headers is None: + headers = get_headers() + + try: + if data == None: + request = urllib.request.Request(url=url, headers=headers, + method='get') + else: + request = urllib.request.Request(url=url, data=data, + headers=headers, + method='post') + + with urllib.request.urlopen(url=request, + timeout=timeout) as response: + response_content = response.read() + + is_successful = True + break + + except Exception as exc: + # logging.exception(exc) + timeout += 2 + finally: + pass + # response.close() + + return is_successful, response_content diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a6ca9ac --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +iso8601>=0.1.14 +m3u8>=0.9.0 +pycryptodome>=3.10.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..2e133e7 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +from os.path import dirname, abspath, join, exists + +from setuptools import setup, find_packages + +long_description = None +if exists("README.rst"): + with open("README.rst") as file: + long_description = file.read() + +install_reqs = [req for req in + open(abspath(join(dirname(__file__), 'requirements.txt')))] + +setup( + name='m3u8-To-MP4', + version="0.1.0", + description="Python downloader for saving m3u8 video to local MP4 file.", + long_description_content_type="text/x-rst", + long_description=long_description, + author='songs18', + author_email='songhaohao2018@cqu.edu.cn', + license='MIT', + packages=find_packages(), + platforms=['all'], + url="https://github.com/songs18/m3u8_To_MP4", + zip_safe=False, + include_package_data=True, + install_requires=install_reqs, + python_requires='>=3.6' +)