Skip to content

Commit

Permalink
Merge pull request #5 from blackjack4494/master
Browse files Browse the repository at this point in the history
[viki][soundcloud][hr-fernsehen] fixes and new extractor
  • Loading branch information
blackjack4494 authored Aug 31, 2020
2 parents d59e0e4 + 4f93dea commit f661aa8
Show file tree
Hide file tree
Showing 8 changed files with 268 additions and 99 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# This workflows will upload a Python Package using Twine when a release is created
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries

name: Upload Python Package

on:
push:
branches:
- release

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
rm -rf dist/*
python setup.py sdist bdist_wheel
twine upload dist/*
1 change: 1 addition & 0 deletions make_win.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pyinstaller.exe youtube_dl\__main__.py --onefile --name youtube-dlc
127 changes: 41 additions & 86 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,21 @@
#!/usr/bin/env python
# coding: utf-8

from __future__ import print_function

from setuptools import setup, Command
import os.path
import warnings
import sys

try:
from setuptools import setup, Command
setuptools_available = True
except ImportError:
from distutils.core import setup, Command
setuptools_available = False
from distutils.spawn import spawn

try:
# This will create an exe that needs Microsoft Visual C++ 2008
# Redistributable Package
import py2exe
except ImportError:
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
print('Cannot import py2exe', file=sys.stderr)
exit(1)

py2exe_options = {
'bundle_files': 1,
'compressed': 1,
'optimize': 2,
'dist_dir': '.',
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
}

# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))

DESCRIPTION = 'YouTube video downloader'
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'

py2exe_console = [{
'script': './youtube_dl/__main__.py',
'dest_base': 'youtube-dl',
'version': __version__,
'description': DESCRIPTION,
'comments': LONG_DESCRIPTION,
'product_name': 'youtube-dl',
'product_version': __version__,
}]

py2exe_params = {
'console': py2exe_console,
'options': {'py2exe': py2exe_options},
'zipfile': None
}
DESCRIPTION = 'Media downloader supporting various sites such as youtube'
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites. Based on a more active community fork.'

if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
params = py2exe_params
print("inv")
else:
files_spec = [
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
Expand All @@ -78,10 +37,10 @@
params = {
'data_files': data_files,
}
if setuptools_available:
params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
else:
params['scripts'] = ['bin/youtube-dl']
#if setuptools_available:
params['entry_points'] = {'console_scripts': ['youtube-dlc = youtube_dl:main']}
#else:
# params['scripts'] = ['bin/youtube-dlc']

class build_lazy_extractors(Command):
description = 'Build the extractor lazy loading module'
Expand All @@ -100,49 +59,45 @@ def run(self):
)

setup(
name='youtube_dl',
name="youtube_dlc",
version=__version__,
maintainer="Tom-Oliver Heidel",
maintainer_email="theidel@uni-bremen.de",
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
url='https://github.com/ytdl-org/youtube-dl',
author='Ricardo Garcia',
author_email='ytdl@yt-dl.org',
maintainer='Sergey M.',
maintainer_email='dstftw@gmail.com',
license='Unlicense',
packages=[
# long_description_content_type="text/markdown",
url="https://github.com/blackjack4494/youtube-dlc",
# packages=setuptools.find_packages(),
packages=[
'youtube_dl',
'youtube_dl.extractor', 'youtube_dl.downloader',
'youtube_dl.postprocessor'],

# Provokes warning on most systems (why?!)
# test_suite = 'nose.collector',
# test_requires = ['nosetest'],

classifiers=[
'Topic :: Multimedia :: Video',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'License :: Public Domain',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: Implementation',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: IronPython',
'Programming Language :: Python :: Implementation :: Jython',
'Programming Language :: Python :: Implementation :: PyPy',
"Topic :: Multimedia :: Video",
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"Programming Language :: Python",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: Implementation",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: IronPython",
"Programming Language :: Python :: Implementation :: Jython",
"Programming Language :: Python :: Implementation :: PyPy",
"License :: Public Domain",
"Operating System :: OS Independent",
],

cmdclass={'build_lazy_extractors': build_lazy_extractors},
python_requires='>=2.6',

cmdclass={'build_lazy_extractors': build_lazy_extractors},
**params
)
)
1 change: 1 addition & 0 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@
)
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
from .hrfensehen import HRFernsehenIE
from .hrti import (
HRTiIE,
HRTiPlaylistIE,
Expand Down
102 changes: 102 additions & 0 deletions youtube_dl/extractor/hrfensehen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# coding: utf-8
from __future__ import unicode_literals

import json
import re

from youtube_dl.utils import int_or_none, unified_timestamp, unescapeHTML
from .common import InfoExtractor


class HRFernsehenIE(InfoExtractor):
IE_NAME = 'hrfernsehen'
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'

_TESTS = [{
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
'md5': '5c4e0ba94677c516a2f65a84110fc536',
'info_dict': {
'id': '130546',
'ext': 'mp4',
'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
'subtitles': {'de': [{
'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
}]},
'timestamp': 1598470200,
'upload_date': '20200826',
'thumbnails': [{
'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg',
'id': '0'
}, {
'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
'id': '1'
}],
'title': 'hessenschau vom 26.08.2020'
}
}, {
'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
'only_matching': True
}]

_GEO_COUNTRIES = ['DE']

def extract_airdate(self, loader_data):
airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')

if airdate_str is None:
return None

return unified_timestamp(airdate_str)

def extract_formats(self, loader_data):
stream_formats = []
for stream_obj in loader_data["videoResolutionLevels"]:
stream_format = {
'format_id': str(stream_obj['verticalResolution']) + "p",
'height': stream_obj['verticalResolution'],
'url': stream_obj['url'],
}

quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
stream_obj['url'])
if quality_information:
stream_format['width'] = int_or_none(quality_information.group(1))
stream_format['height'] = int_or_none(quality_information.group(2))
stream_format['fps'] = int_or_none(quality_information.group(3))
stream_format['tbr'] = int_or_none(quality_information.group(4))

stream_formats.append(stream_format)

self._sort_formats(stream_formats)
return stream_formats

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

title = self._html_search_meta(
['og:title', 'twitter:title', 'name'], webpage)
description = self._html_search_meta(
['description'], webpage)

loader_str = unescapeHTML(self._search_regex(r"data-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
loader_data = json.loads(loader_str)

info = {
'id': video_id,
'title': title,
'description': description,
'formats': self.extract_formats(loader_data),
'timestamp': self.extract_airdate(loader_data)
}

if "subtitle" in loader_data:
info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}

thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
if len(thumbnails) > 0:
info["thumbnails"] = [{"url": t} for t in thumbnails]

return info
Loading

0 comments on commit f661aa8

Please sign in to comment.