Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into my
Browse files Browse the repository at this point in the history
  • Loading branch information
lockmatrix authored Aug 26, 2022
2 parents e765ece + a1af516 commit d0b5727
Show file tree
Hide file tree
Showing 24 changed files with 300 additions and 147 deletions.
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi
--list-extractors List all supported extractors and exit
--extractor-descriptions Output descriptions of all supported
extractors and exit
--force-generic-extractor Force extraction to use the generic extractor
--use-extractors NAMES Extractor names to use separated by commas.
You can also use regexes, "all", "default"
and "end" (end URL matching); e.g. --ies
"holodex.*,end,youtube". Prefix the name
with a "-" to exclude it, e.g. --ies
default,-generic. Use --list-extractors for
a list of extractor names. (Alias: --ies)
--default-search PREFIX Use this prefix for unqualified URLs. E.g.
"gvsearch2:python" downloads two videos from
google videos for the search term "python".
Expand Down Expand Up @@ -524,8 +530,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi
a file that is in the archive
--break-on-reject Stop the download process when encountering
a file that has been filtered out
--break-per-input Make --break-on-existing, --break-on-reject
and --max-downloads act only on the current
--break-per-input Make --break-on-existing, --break-on-reject,
--max-downloads and autonumber reset per
input URL
--no-break-per-input --break-on-existing and similar options
terminates the entire download queue
Expand Down Expand Up @@ -2058,6 +2064,7 @@ While these options are redundant, they are still expected to be used due to the
#### Not recommended
While these options still work, their use is not recommended since there are other alternatives to achieve the same

--force-generic-extractor --ies generic,default
--exec-before-download CMD --exec "before_dl:CMD"
--no-exec-before-download --no-exec
--all-formats -f all
Expand Down
11 changes: 7 additions & 4 deletions devscripts/lazy_load_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,17 @@

# These bloat the lazy_extractors, so allow them to passthrough silently
ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'}
_WARNED = False


class LazyLoadMetaClass(type):
def __getattr__(cls, name):
if '_real_class' not in cls.__dict__ and name not in ALLOWED_CLASSMETHODS:
write_string(
'WARNING: Falling back to normal extractor since lazy extractor '
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
global _WARNED
if ('_real_class' not in cls.__dict__
and name not in ALLOWED_CLASSMETHODS and not _WARNED):
_WARNED = True
write_string('WARNING: Falling back to normal extractor since lazy extractor '
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
return getattr(cls.real_class, name)


Expand Down
4 changes: 3 additions & 1 deletion devscripts/make_lazy_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from devscripts.utils import get_filename_args, read_file, write_file

NO_ATTR = object()
STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_NETRC_MACHINE', 'age_limit']
STATIC_CLASS_PROPERTIES = [
'IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_ENABLED', '_NETRC_MACHINE', 'age_limit'
]
CLASS_METHODS = [
'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable'
]
Expand Down
41 changes: 23 additions & 18 deletions test/test_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,41 +11,46 @@
import contextlib
import subprocess

from yt_dlp.utils import encodeArgument
from yt_dlp.utils import Popen

rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py'


try:
_DEV_NULL = subprocess.DEVNULL
except AttributeError:
_DEV_NULL = open(os.devnull, 'wb')
class TestExecution(unittest.TestCase):
def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )):
stdout, stderr, returncode = Popen.run(
[*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(stderr, file=sys.stderr)
self.assertEqual(returncode, 0)
return stdout.strip(), stderr.strip()

def test_main_exec(self):
self.run_yt_dlp()

class TestExecution(unittest.TestCase):
def test_import(self):
subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir)
self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp'))

def test_module_exec(self):
subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)

def test_main_exec(self):
subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp'))

def test_cmdline_umlauts(self):
p = subprocess.Popen(
[sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('ä'), '--version'],
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
_, stderr = p.communicate()
_, stderr = self.run_yt_dlp(opts=('ä', '--version'))
self.assertFalse(stderr)

def test_lazy_extractors(self):
try:
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS],
cwd=rootDir, stdout=subprocess.DEVNULL)
self.assertTrue(os.path.exists(LAZY_EXTRACTORS))

_, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
self.assertFalse(stderr)

subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
finally:
with contextlib.suppress(OSError):
os.remove('yt_dlp/extractor/lazy_extractors.py')
os.remove(LAZY_EXTRACTORS)


if __name__ == '__main__':
Expand Down
8 changes: 8 additions & 0 deletions test/test_youtube_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,14 @@
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
),
(
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
),
(
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
),
]


Expand Down
94 changes: 43 additions & 51 deletions yt_dlp/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
from .extractor import gen_extractor_classes, get_info_extractor
from .extractor.common import UnsupportedURLIE
from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
Expand All @@ -47,7 +48,7 @@
get_postprocessor,
)
from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
from .update import detect_variant
from .update import REPOSITORY, current_git_head, detect_variant
from .utils import (
DEFAULT_OUTTMPL,
IDENTITY,
Expand Down Expand Up @@ -115,6 +116,7 @@
network_exceptions,
number_of_digits,
orderedSet,
orderedSet_from_options,
parse_filesize,
preferredencoding,
prepend_extension,
Expand Down Expand Up @@ -236,7 +238,7 @@ class YoutubeDL:
Default is 'only_download' for CLI, but False for API
skip_playlist_after_errors: Number of allowed failures until the rest of
the playlist is skipped
force_generic_extractor: Force downloader to use the generic extractor
allowed_extractors: List of regexes to match against extractor names that are allowed
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
and don't overwrite any file if False
Expand Down Expand Up @@ -476,6 +478,8 @@ class YoutubeDL:
The following options are deprecated and may be removed in the future:
force_generic_extractor: Force downloader to use the generic extractor
- Use allowed_extractors = ['generic', 'default']
playliststart: - Use playlist_items
Playlist item to start at.
playlistend: - Use playlist_items
Expand Down Expand Up @@ -757,13 +761,6 @@ def add_info_extractor(self, ie):
self._ies_instances[ie_key] = ie
ie.set_downloader(self)

def _get_info_extractor_class(self, ie_key):
ie = self._ies.get(ie_key)
if ie is None:
ie = get_info_extractor(ie_key)
self.add_info_extractor(ie)
return ie

def get_info_extractor(self, ie_key):
"""
Get an instance of an IE with name ie_key, it will try to get one from
Expand All @@ -780,8 +777,19 @@ def add_default_info_extractors(self):
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
"""
for ie in gen_extractor_classes():
self.add_info_extractor(ie)
all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
all_ies['end'] = UnsupportedURLIE()
try:
ie_names = orderedSet_from_options(
self.params.get('allowed_extractors', ['default']), {
'all': list(all_ies),
'default': [name for name, ie in all_ies.items() if ie._ENABLED],
}, use_regex=True)
except re.error as e:
raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
for name in ie_names:
self.add_info_extractor(all_ies[name])
self.write_debug(f'Loaded {len(ie_names)} extractors')

def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain."""
Expand Down Expand Up @@ -1412,11 +1420,11 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
ie_key = 'Generic'

if ie_key:
ies = {ie_key: self._get_info_extractor_class(ie_key)}
ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
else:
ies = self._ies

for ie_key, ie in ies.items():
for key, ie in ies.items():
if not ie.suitable(url):
continue

Expand All @@ -1425,14 +1433,16 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
'and will probably not work.')

temp_id = ie.get_temp_id(url)
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
if self.params.get('break_on_existing', False):
raise ExistingVideoReached()
break
return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
tb=False if extractors_restricted else None)

def _handle_extraction_exceptions(func):
@functools.wraps(func)
Expand Down Expand Up @@ -2737,27 +2747,11 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
if self.params.get('allsubtitles', False):
requested_langs = all_sub_langs
elif self.params.get('subtitleslangs', False):
# A list is used so that the order of languages will be the same as
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
requested_langs = []
for lang_re in self.params.get('subtitleslangs'):
discard = lang_re[0] == '-'
if discard:
lang_re = lang_re[1:]
if lang_re == 'all':
if discard:
requested_langs = []
else:
requested_langs.extend(all_sub_langs)
continue
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
if discard:
for lang in current_langs:
while lang in requested_langs:
requested_langs.remove(lang)
else:
requested_langs.extend(current_langs)
requested_langs = orderedSet(requested_langs)
try:
requested_langs = orderedSet_from_options(
self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
except re.error as e:
raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
elif normal_sub_langs:
requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
else:
Expand Down Expand Up @@ -3271,6 +3265,7 @@ def wrapper(*args, **kwargs):
self.to_screen(f'[info] {e}')
if not self.params.get('break_per_url'):
raise
self._num_downloads = 0
else:
if self.params.get('dump_single_json', False):
self.post_extract(res)
Expand Down Expand Up @@ -3319,6 +3314,12 @@ def sanitize_info(info_dict, remove_private_keys=False):
return info_dict
info_dict.setdefault('epoch', int(time.time()))
info_dict.setdefault('_type', 'video')
info_dict.setdefault('_version', {
'version': __version__,
'current_git_head': current_git_head(),
'release_git_head': RELEASE_GIT_HEAD,
'repository': REPOSITORY,
})

if remove_private_keys:
reject = lambda k, v: v is None or k.startswith('__') or k in {
Expand Down Expand Up @@ -3683,7 +3684,8 @@ def get_encoding(stream):
if VARIANT not in (None, 'pip'):
source += '*'
write_debug(join_nonempty(
'yt-dlp version', __version__,
f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
__version__,
f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
'' if source == 'unknown' else f'({source})',
delim=' '))
Expand All @@ -3699,18 +3701,8 @@ def get_encoding(stream):
if self.params['compat_opts']:
write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))

if source == 'source':
try:
stdout, _, _ = Popen.run(
['git', 'rev-parse', '--short', 'HEAD'],
text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if re.fullmatch('[0-9a-f]+', stdout.strip()):
write_debug(f'Git HEAD: {stdout.strip()}')
except Exception:
with contextlib.suppress(Exception):
sys.exc_clear()

if current_git_head():
write_debug(f'Git HEAD: {current_git_head()}')
write_debug(system_identifier())

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
Expand Down
1 change: 1 addition & 0 deletions yt_dlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,7 @@ def parse_options(argv=None):
'windowsfilenames': opts.windowsfilenames,
'ignoreerrors': opts.ignoreerrors,
'force_generic_extractor': opts.force_generic_extractor,
'allowed_extractors': opts.allowed_extractors or ['default'],
'ratelimit': opts.ratelimit,
'throttledratelimit': opts.throttledratelimit,
'overwrites': opts.overwrites,
Expand Down
5 changes: 3 additions & 2 deletions yt_dlp/extractor/bilibili.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,14 +858,15 @@ def _search_results(self, query):
'keyword': query,
'page': page_num,
'context': '',
'order': 'pubdate',
'duration': 0,
'tids_2': '',
'__refresh__': 'true',
'search_type': 'video',
'tids': 0,
'highlight': 1,
})['data'].get('result') or []
})['data'].get('result')
if not videos:
break
for video in videos:
yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))

Expand Down
4 changes: 3 additions & 1 deletion yt_dlp/extractor/bitchute.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,12 @@ def _real_extract(self, url):
error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video')
if error == 'Video Unavailable':
raise GeoRestrictedError(error)
raise ExtractorError(error)
raise ExtractorError(error, expected=True)
formats = entries[0]['formats']

self._check_formats(formats, video_id)
if not formats:
raise self.raise_no_formats('Video is unavailable', expected=True, video_id=video_id)
self._sort_formats(formats)

description = self._html_search_regex(
Expand Down
Loading

0 comments on commit d0b5727

Please sign in to comment.